Integration free kernels for equivariant Gaussian process modelling

Anonymous author

2024-10-02

library(progress)
library(doParallel)
library(pracma)
library(proxy)
library(nloptr)
library(RColorBrewer)
library(rgl)
notcluster=1
lr=.01;max_iter=1000
QUICK_RUN=0
jit=1e-6

Experiment 1: Synthetic rotation equivariant data

Covariance matrices

# Squared exponential kernel

cov_mat=function(x1,x2,sigma1,sigma2,l1,l2){
  norm=function(x){sum(x^2)^(.5)}

  n1=ifelse(length(as.matrix(x1))==2,1,nrow(x1))
  n2=ifelse(length(as.matrix(x2))==2,1,nrow(x2))
  x1=as.matrix(x1);x2=as.matrix(x2)
  if(n1==1){
    if(n2==1){
      dist_mat=norm(x1-x2)
    }else{dist_mat=distmat(t(x1),x2)
    }
  }else{
    dist_mat=distmat(x1,x2)
  }
  
  cov=matrix(0,nrow=2*n1,ncol=2*n2)
  cov[1:n1,1:n2]=sigma1^2*exp(-.5*dist_mat^2*(l1^2))
  cov[(n1+1):(2*n1),(n2+1):(2*n2)]=sigma2^2*exp(-.5*dist_mat^2*(l2^2))
  cov[(n1+1):(2*n1),1:n2]=cov[1:n1,(n2+1):(2*n2)]=0
  
  
  return(cov)
}
#Helmholtz kernel

cov_mat_helm=function(x1,x2,l1,sigma1,l2,sigma2){
  norm=function(x){sum(x^2)^(.5)}

  x1=as.matrix(x1);x2=as.matrix(x2)
  dist_mat=distmat(x1,x2)
  diff_mat=outer(x1[, 1], x2[, 1], "-") * outer(x1[, 2], x2[, 2], "-")
  dist_mat_x1=distmat(cbind(x1[,1],0),cbind(x2[,1],0))
  dist_mat_x2=distmat(cbind(x1[,2],0),cbind(x2[,2],0))
  
  n1=nrow(x1)
  n2=nrow(x2)
  
  cov=matrix(0,nrow=2*n1,ncol=2*n2)
  
  cov[1:n1,1:n2]=sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x1^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x2^2)
  
  
  cov[(n1+1):(2*n1),(n2+1):(2*n2)]=sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*
    (l1^2-dist_mat_x2^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x1^2)
  
  cov[(n1+1):(2*n1),1:n2]=diff_mat*(-exp(-.5*dist_mat^2/(l1^2))*sigma1^2/((l1)^4)+
                                      sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2)) )
  
  
  cov[1:n1,(n2+1):(2*n2)]=cov[(n1+1):(2*n1),1:n2]
  
  
  return(cov)
}

# Fundamental domain equivariant kernel

fund_cov_mat= function(x1, x2, sigma1,sigma2,l1,l2) {
  norm=function(x){sum(x^2)^(.5)}

  x1=as.matrix(x1);x2=as.matrix(x2)
  cov = matrix(0, ncol =2 * ifelse(length(as.matrix(x2)) == 2, 1,nrow(x2)),
               nrow = 2 * ifelse(length(as.matrix(x1)) == 2, 1, nrow(x1)))
  
  for(i in 1:(length(as.matrix(x1))/2)){
    #if(i%%100==0){print(i)}
    theta1=ifelse(length(as.matrix(x1))==2,atan2(x1[2],x1[1]),
                  atan2(x1[i,2],x1[i,1]))
    for (j in 1:(length(as.matrix(x2))/2)){
      r1=ifelse(length(as.matrix(x1))==2,sum((x1)^2)^.5,
                sum((x1[i,])^2)^.5)
      r2=ifelse(length(as.matrix(x2))==2,sum((x2)^2)^.5,
                sum((x2[j,])^2)^.5)
      #dist=abs(x1[i,1]-x2[j,1])+abs(x1[i,2]-x2[j,2])
      theta2=ifelse(length(as.matrix(x2))==2,atan2(x2[2],x2[1]),
                    atan2(x2[j,2],x2[j,1]))
      cov_standard=cov_mat(c((r1),0),c((r2),0),
                           sigma1,sigma2,l1,l2)
      
      results=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        cov_standard %*%cbind(c(cos(theta2),-sin(theta2)),
                              c(sin(theta2),cos(theta2)))
      
      cov[i, j] = results[1,1]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i,
          ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[2,2]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i, j] = results[2,1]
      cov[i,ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[1,2]
    }
  }
  return(cov)
  
  
}
# Double integration equivariant kernel 

eq_cov_mat = function(x1, x2, l1, sigma1, l2, sigma2, maxEval = 1000) {
  norm=function(x){sum(x^2)^(.5)}

  cov = matrix(0, ncol = 2 * ifelse(length(as.matrix(x2)) == 2, 1, nrow(x2)),
                   nrow = 2 * ifelse(length(as.matrix(x1)) == 2, 1, nrow(x1)))

      for(i in 1:(length(as.matrix(x1))/2)){
        #if(i%%100==0){print(i)}
        for (j in 1:(length(as.matrix(x2))/2)){
          if(length(as.matrix(x1)) == 2){
            repr1 = function(theta1,theta2) {
              sigma1^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)), c(-sin(theta1), cos(theta1)))%*%as.numeric(x1)-
                                       cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%as.numeric(x2))^2)*(l1^2))
            }

            repr2 = function(theta1,theta2) {
              sigma2^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)), c(-sin(theta1), cos(theta1)))%*%as.numeric(x1)-
                                       cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%as.numeric(x2))^2)*(l2^2))
            }
          }else{
            repr1 = function(theta1,theta2) {
              sigma1^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)), c(-sin(theta1), cos(theta1)))%*%as.numeric(x1[i,])-
                                       cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%as.numeric(x2[j,]))^2)*(l1^2))
            }

            repr2 = function(theta1,theta2) {
              sigma2^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)), c(-sin(theta1), cos(theta1)))%*%as.numeric(x1[i,])-
                                       cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%as.numeric(x2[j,]))^2)*(l2^2))
            }
          }

          integrand1 = function(theta) {
            theta1=theta[1]
            theta2=theta[2]
            repr1(theta1,theta2)*cos(theta1)*cos(theta2)+ repr2(theta1,theta2)*sin(theta1)*sin(theta2)
          }

          integrand2 = function(theta) {

            theta1=theta[1]
            theta2=theta[2]
            -repr1(theta1,theta2)*cos(theta1)*sin(theta2)+ repr2(theta1,theta2)*sin(theta1)*cos(theta2)
          }

          integrand3 = function(theta) {
            theta1=theta[1]
            theta2=theta[2]
            -repr1(theta1,theta2)*sin(theta1)*cos(theta2)+ repr2(theta1,theta2)*sin(theta2)*cos(theta1)
          }

          integrand4 = function(theta) {
            theta1=theta[1]
            theta2=theta[2]
            repr2(theta1,theta2)*cos(theta1)*cos(theta2)+ repr1(theta1,theta2)*sin(theta1)*sin(theta2)
          }
          fun=list(integrand1,integrand2,integrand3,integrand4)

          results=foreach(l=1:4,.packages = c("cubature"), .combine = cbind,
                          .export = c("l1", "l2", "sigma1", "sigma2",
                                      "repr1", "repr2", "integrand1",
                                      "integrand2", "integrand3", "integrand4"))%dopar%{
                                        adaptIntegrate(fun[[l]], lower=c(0,0), upper=c(2 * pi,2*pi), maxEval = maxEval)$integral
                                      }

               cov[i, j] = results[1]
          cov[nrow(x1) + i, nrow(x2) + j] = results[4]
          cov[nrow(x1) + i, j] = results[3]
          cov[i, nrow(x2) + j] = results[2]
        }
      }

      return(cov)


}

Likelihood functions

log_likelihood=function(ytr,xtr,sigma1,sigma2,tau,l1,l2){
  if(ncol(ytr)==2){
    ytr=c(ytr[,1],ytr[,2])
  }
  
  Ktr=cov_mat(xtr,xtr,sigma1,sigma2,l1,l2)+
    tau^2*diag(nrow = 2*nrow(xtr))
  
  
  ll=-0.5*t(ytr)%*%inv(Ktr)%*%ytr-0.5*determinant(Ktr,logarithm=1)$modulus-
    nrow(xtr)*log(2*pi)
  ll=ifelse(is.nan(ll),10^6,-ll)
  #ll=ifelse(ll>0,ll,1e6)
  #print(c(ll))
  return(ll)
  
  
}


log_likelihood_helm=function(ytr,xtr,l1,sigma1,l2,sigma2,sigma_obs){
  if(ncol(ytr)==2){
    ytr=c(ytr[,1],ytr[,2])
  }
  
  Ktr=cov_mat_helm(xtr,xtr,l1,sigma1,l2,sigma2)+sigma_obs^2*
    diag(nrow=2*nrow(xtr))
  (ll=-.5*((ytr)%*%inv(Ktr)%*%ytr)-.5*determinant(Ktr,logarithm=1)$modulus-
      nrow(xtr)*log(2*pi))
  ll=ifelse(is.na(ll),1e6,-ll)
  #print(ll)
  return(ll)
}

log_likelihood_fund=function(ytr,xtr,sigma1,sigma2,tau,l1,l2){
  if(ncol(ytr)==2){
    ytr=c(ytr[,1],ytr[,2])
  }
  
  Ktr=fund_cov_mat(xtr,xtr,sigma1,sigma2,l1,l2)+
    tau^2*diag(nrow = 2*nrow(xtr))
  
  
  ll=-0.5*t(ytr)%*%inv(Ktr)%*%ytr-0.5*determinant(Ktr,logarithm=1)$modulus-
    nrow(xtr)*log(2*pi)
  ll=ifelse(is.nan(ll),10^6,-ll)
  #ll=ifelse(ll>0,ll,1e6)
  #print(c(ll))
  return(ll)
  
  
}

log_likelihood_eq=function(ytr,xtr,l1,sigma1,l2,sigma2,sigma_obs,
                             maxEval=1000){
  if(ncol(ytr)==2){
    ytr=c(ytr[,1],ytr[,2])
  }

  Ktr=eq_cov_mat(xtr,xtr,l1,sigma1,l2,sigma2,maxEval = maxEval)+
        sigma_obs^2*diag(nrow=2*nrow(xtr))

  ll=-0.5*sum((ytr)*solve(Ktr,ytr))-0.5*log(det(Ktr))-nrow(xtr)*log(2*pi)
  ll=ifelse(is.nan(ll),10^6,-ll)
  ll=ifelse(ll>0,ll,1e6)
  #print(c(ll))
  return(ll)


}

Likelihood gradients

norm=function(x){sum(x^2)^.5}
del_l_1=function(x1,x2,sigma1,sigma2,l1,l2){
  
  
  n1=ifelse(length(as.matrix(x1))==2,1,nrow(x1))
  n2=ifelse(length(as.matrix(x2))==2,1,nrow(x2))
  if(n1==1){
    if(n2==1){
      dist_mat=norm(x1-x2)
    }else{dist_mat=distmat(t(x1),x2)
    }
  }else{
    dist_mat=distmat(x1,x2) #needs pracma library
  }
  
  del_l1=matrix(0,nrow=2*n1,ncol=2*n2)
  del_l1[1:n1,1:n2]=sigma1^2*exp(-.5*dist_mat^2*(l1^2))*(-l1*dist_mat^2)
  del_l1[(n1+1):(2*n1),1:n2]=del_l1[(1):n1,(n2+1):(2*n2)]=0
  
  del_l1[(n1+1):(2*n1),(n2+1):(2*n2)]=0
  return(del_l1)
}

del_l_2=function(x1,x2,sigma1,sigma2,l1,l2){
  
  
  n1=ifelse(length(as.matrix(x1))==2,1,nrow(x1))
  n2=ifelse(length(as.matrix(x2))==2,1,nrow(x2))
  if(n1==1){
    if(n2==1){
      dist_mat=norm(x1-x2)
    }else{dist_mat=distmat(t(x1),x2)
    }
  }else{
    dist_mat=distmat(x1,x2) #needs pracma library
  }
  
  del_l2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_l2[1:n1,1:n2]=0
  del_l2[(n1+1):(2*n1),1:n2]=del_l2[(1):n1,(n2+1):(2*n2)]=0
  del_l2[(n1+1):(2*n1),(n2+1):(2*n2)]=sigma2^2*exp(-.5*dist_mat^2*(l2^2))*(-l2*dist_mat^2)
  return(del_l2)
}

del_sig1=function(x1,x2,sigma1,sigma2,l1,l2){
  
  n1=ifelse(length(as.matrix(x1))==2,1,nrow(x1))
  n2=ifelse(length(as.matrix(x2))==2,1,nrow(x2))
  if(n1==1){
    if(n2==1){
      dist_mat=norm(x1-x2)
    }else{dist_mat=distmat(t(x1),x2)
    }
  }else{
    dist_mat=distmat(x1,x2) #needs pracma library
  }
  
  del_sigma1=matrix(0,nrow=2*n1,ncol=2*n2)
  del_sigma1[1:n1,1:n2]=2*sigma1*exp(-.5*dist_mat^2*(l1^2))
  del_sigma1[(n1+1):(2*n1),1:n2]=0
  del_sigma1[(n1+1):(2*n1),(n2+1):(2*n2)]=0
  return(del_sigma1)
}

del_sig2=function(x1,x2,sigma1,sigma2,l1,l2){
  
  
  n1=ifelse(length(as.matrix(x1))==2,1,nrow(x1))
  n2=ifelse(length(as.matrix(x2))==2,1,nrow(x2))
  if(n1==1){
    if(n2==1){
      dist_mat=norm(x1-x2)
    }else{dist_mat=distmat(t(x1),x2)
    }
  }else{
    dist_mat=distmat(x1,x2) #needs pracma library
  }
  
  del_sigma2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_sigma2[1:n1,1:n2]=0
  del_sigma2[(n1+1):(2*n1),1:n2]=del_sigma2[(1):n1,(n2+1):(2*n2)]=0
  del_sigma2[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma2*exp(-.5*dist_mat^2*(l2^2))
  return(del_sigma2)
}


grad_fund=function(xtr,ytr,sigma1,sigma2,tau,l1,l2){
  x1=x2=xtr
  K=fund_cov_mat(xtr,xtr,sigma1,sigma2,l1,l2)+
    tau^2*diag(nrow = 2*nrow(xtr))
  
  
  n1=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  n2=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  
  cov = matrix(0, ncol =2 * ifelse(length(as.matrix(x2)) == 2, 1,nrow(x2)),
               nrow = 2 * ifelse(length(as.matrix(x1)) == 2, 1, nrow(x1)))
  
  del_sigma1=del_sigma2=del_l1=del_l2=cov
  
  for(i in 1:(length(as.matrix(x1))/2)){
    #if(i%%100==0){print(i)}
    theta1=ifelse(length(as.matrix(x1))==2,atan2(x1[2],x1[1]),
                  atan2(x1[i,2],x1[i,1]))
    for (j in 1:(length(as.matrix(x2))/2)){
      r1=ifelse(length(as.matrix(x1))==2,sum((x1)^2)^.5,
                sum((x1[i,])^2)^.5)
      r2=ifelse(length(as.matrix(x2))==2,sum((x2)^2)^.5,
                sum((x2[j,])^2)^.5)
      #dist=abs(x1[i,1]-x2[j,1])+abs(x1[i,2]-x2[j,2])
      theta2=ifelse(length(as.matrix(x2))==2,atan2(x2[2],x2[1]),
                    atan2(x2[j,2],x2[j,1]))
      cov_standard=cov_mat(c((r1),0),c((r2),0),
                           sigma1,sigma2,l1,l2)
      
      results=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        cov_standard %*%cbind(c(cos(theta2),-sin(theta2)),
                              c(sin(theta2),cos(theta2)))
      
      cov[i, j] = results[1,1]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i,
          ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[2,2]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i, j] = results[2,1]
      cov[i,ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[1,2]
      
      
      results_sigma1=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_sig1(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))
      
      
      results_sigma2=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_sig2(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))
      
      
      
      results_l1=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_l_1(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))
      
      results_l2=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_l_2(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))
      
      
      
      del_sigma1[i, j] = results_sigma1[1,1]
      del_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma1[2,2]
      del_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_sigma1[2,1]
      del_sigma1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma1[1,2]
      
      
      
      del_sigma2[i, j] = results_sigma2[1,1]
      del_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma2[2,2]
      del_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_sigma2[2,1]
      del_sigma2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma2[1,2]
      
      
      
      del_l1[i, j] = results_l1[1,1]
      del_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l1[2,2]
      del_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_l1[2,1]
      del_l1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l1[1,2]
      
      del_l2[i, j] = results_l2[1,1]
      del_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l2[2,2]
      del_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_l2[2,1]
      del_l2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l2[1,2]
      
      
      
    }
  }
  inv_K=inv(K)
  alpha=1
  grad=.5*c(-t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_sigma1)%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%(alpha*del_sigma1)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_sigma2)%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%(alpha*del_sigma2)),
            -t(c(ytr[,1],ytr[,2]))%*%(2*tau*inv_K)%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(2*tau*inv_K),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_l1)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%(alpha*del_l1)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_l2)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%(alpha*del_l2))
  )
  #print(grad)
  return(grad)
  
  
}

grad_standard=function(xtr,ytr,sigma1,sigma2,tau,l1,l2){
  x1=x2=xtr
  K=cov_mat(xtr,xtr,sigma1,sigma2,l1,l2)+
    tau^2*diag(nrow = 2*nrow(xtr))
  
  
  n1=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  n2=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  if(n1==1){
    if(n2==1){
      dist_mat=norm(x1-x2)
    }else{dist_mat=distmat(t(x1),x2)
    }
  }else{
    dist_mat=distmat(x1,x2) #needs pracma library
  }
  
  
  
  del_sigma1=matrix(0,nrow=2*n1,ncol=2*n2)
  del_sigma1[1:n1,1:n2]=2*sigma1*exp(-.5*dist_mat^2*(l1^2))
  del_sigma1[(n1+1):(2*n1),1:n2]=del_sigma1[(1):n1,(n2+1):(2*n2)]=0
  del_sigma1[(n1+1):(2*n1),(n2+1):(2*n2)]=0
  
  
  del_sigma2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_sigma2[1:n1,1:n2]=0
  del_sigma2[(n1+1):(2*n1),1:n2]=del_sigma2[(1):n1,(n2+1):(2*n2)]=0
  del_sigma2[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma2*exp(-.5*dist_mat^2*(l2^2))
  
  
  
  del_tau=2*tau*diag(nrow=2*nrow(xtr))
  
  
  
  del_l1=matrix(0,nrow=2*n1,ncol=2*n2)
  del_l1[1:n1,1:n2]=sigma1^2*exp(-.5*dist_mat^2*(l1^2))*(-l1*dist_mat^2)
  del_l1[(n1+1):(2*n1),1:n2]=del_l1[(1):n1,(n2+1):(2*n2)]=0
  del_l1[(n1+1):(2*n1),(n2+1):(2*n2)]=0
  
  del_l2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_l2[1:n1,1:n2]=0
  del_l2[(n1+1):(2*n1),1:n2]=del_l2[(1):n1,(n2+1):(2*n2)]=0
  del_l2[(n1+1):(2*n1),(n2+1):(2*n2)]=sigma2^2*exp(-.5*dist_mat^2*(l2^2))*(-l2*dist_mat^2)
  
  
  inv_K=inv(K)
  
  #sigma1,sigma2,sigma3,sigma4,nu1=1,nu2=1,tau,a1,a2
  
  grad=0.5*c(-t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_sigma1%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_sigma1),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_sigma2%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_sigma2),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_tau%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%del_tau),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_l1%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_l1),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_l2%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_l2)
  )
  
  return(grad)
  
}

grad_helm=function(x1,ytr,l1,sigma1,l2,sigma2,sigma_obs_2){
  x1=x2=as.matrix(x1)
  dist_mat=distmat(x1,x2)
  diff_mat=outer(x1[, 1], x2[, 1], "-") * outer(x1[, 2], x2[, 2], "-")
  dist_mat_x1=distmat(cbind(x1[,1],0),cbind(x2[,1],0))
  dist_mat_x2=distmat(cbind(x1[,2],0),cbind(x2[,2],0))
  
  n1=nrow(x1)
  n2=nrow(x2)
  
  del_cov1=del_cov2=del_cov3=del_cov4=cov=matrix(0,nrow=2*n1,ncol=2*n2)
  
  
  del_cov1[1:n1,1:n2]=-4*sigma1^2/((l1)^5)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x1^2)+
    sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*((l1^2-dist_mat_x1^2)*dist_mat^2/(l1^3)+2*l1)
  
  del_cov1[(n1+1):(2*n1),(n2+1):(2*n2)]=-4*sigma1^2/((l1)^5)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x2^2)+
    sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*((l1^2-dist_mat_x2^2)*dist_mat^2/(l1^3)+2*l1)
  
  del_cov1[1:n1,(n2+1):(2*n2)]=del_cov1[(n1+1):(2*n1),1:n2]=
    -diff_mat*exp(-.5*dist_mat^2/(l1^2))*(dist_mat^2/(l1^3)*sigma1^2/((l1)^4)-4*sigma1^2/((l1)^5))
  
  del_cov2[1:n1,1:n2]=-4*sigma2^2/((l2)^5)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x2^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*((l2^2-dist_mat_x2^2)*dist_mat^2/(l2^3)+2*l2)
  
  del_cov2[(n1+1):(2*n1),(n2+1):(2*n2)]=-4*sigma2^2/((l2)^5)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x1^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*((l2^2-dist_mat_x1^2)*dist_mat^2/(l2^3)+2*l2)
  
  del_cov2[1:n1,(n2+1):(2*n2)]=del_cov2[(n1+1):(2*n1),1:n2]=
    diff_mat*exp(-.5*dist_mat^2/(l2^2))*(dist_mat^2/(l2^3)*sigma2^2/((l2)^4)-4*sigma2^2/((l2)^5))
  
  
  del_cov3[1:n1,1:n2]=2*sigma1/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x1^2)
  del_cov3[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma1/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*
    (l1^2-dist_mat_x2^2)
  del_cov3[1:n1,(n2+1):(2*n2)]=del_cov3[(n1+1):(2*n1),1:n2]=
    diff_mat*(-exp(-.5*dist_mat^2/(l1^2))*2*sigma1/((l1)^4))
  
  del_cov4[1:n1,1:n2]=2*sigma2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x2^2)
  del_cov4[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma2/((l2)^4)*exp(-.5*dist_mat^2/
                                                                (l2^2))*(l2^2-dist_mat_x1^2)
  del_cov4[1:n1,(n2+1):(2*n2)]=del_cov4[(n1+1):(2*n1),1:n2]=
    diff_mat*(2*sigma2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2)) )
  
  cov[1:n1,1:n2]=sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x1^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x2^2)
  
  cov[(n1+1):(2*n1),(n2+1):(2*n2)]=sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*
    (l1^2-dist_mat_x2^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x1^2)
  
  cov[(n1+1):(2*n1),1:n2]=diff_mat*(-exp(-.5*dist_mat^2/(l1^2))*sigma1^2/((l1)^4)+
                                      sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2)) )
  
  
  cov[1:n1,(n2+1):(2*n2)]=cov[(n1+1):(2*n1),1:n2]
  
  K=cov+diag(sigma_obs_2^2,nrow=2*nrow(x1))
  inv_K=inv(K)
  
  grad=.5*c(-t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_cov1%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_cov1),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_cov3%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%del_cov3),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_cov2%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_cov2),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_cov4%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%del_cov4),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%diag(2*sigma_obs_2,nrow=2*nrow(x1))%*%inv_K%*%c(ytr[,1],ytr[,2])+
              Trace(inv_K%*%diag(2*sigma_obs_2,nrow=2*nrow(x1))))
  
  
  return(grad)
}


grad_eq=function(xtr,ytr,l1,sigma1,l2,sigma2,sigma_obs,maxEval=1000){


  K=eq_cov_mat(xtr,xtr,l1,sigma1,l2,sigma2,maxEval = maxEval)+
    diag(sigma_obs^2,nrow=2*nrow(xtr))



  K_l1 = matrix(0, ncol =2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)),
                  nrow = 2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)))
  K_l2 = matrix(0, ncol =2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)),
                  nrow = 2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)))
  K_sigma1 = matrix(0, ncol =2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)),
                      nrow = 2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)))
  K_sigma2 = matrix(0, ncol =2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)),
                      nrow = 2 * ifelse(length(as.matrix(xtr)) == 2, 1, nrow(xtr)))


  for(i in 1:(length(as.matrix(xtr))/2)){
    #if(i%%100==0){print(i)}
    for (j in 1:(length(as.matrix(xtr))/2)){

      repr1 = function(theta1,theta2) {
        sigma1^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)),
                                       c(-sin(theta1), cos(theta1)))%*%
                                   as.numeric(xtr[i,])-
                                 cbind(c(cos(theta2), sin(theta2)),
                                       c(-sin(theta2), cos(theta2)))%*%
                                   as.numeric(xtr[j,]))^2)*(l1^2))
      }

      repr2 = function(theta1,theta2){
        sigma2^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)),
                                       c(-sin(theta1), cos(theta1)))%*%
                                   as.numeric(xtr[i,])-
                                 cbind(c(cos(theta2), sin(theta2)),
                                       c(-sin(theta2), cos(theta2)))%*%
                                   as.numeric(xtr[j,]))^2)*(l2^2))

      }

      integrand1 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr1(theta1,theta2)*cos(theta1)*cos(theta2)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l1)
      }

      integrand2 = function(theta) {

        theta1=theta[1]
        theta2=theta[2]
        -repr1(theta1,theta2)*cos(theta1)*sin(theta2)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l1)
      }
      
      integrand3 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        -repr1(theta1,theta2)*sin(theta1)*cos(theta2)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l1)
      }

      integrand4 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr1(theta1,theta2)*sin(theta1)*sin(theta2)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l1)
      }
      fun=list(integrand1,integrand2,integrand3,integrand4)

      results1=foreach(l=1:4,.packages = c("cubature"), .combine = cbind,
                       .export = c("l1", "l2", "sigma1", "sigma2",
                                   "repr1", "repr2", "integrand1","xtr","ytr",
                                   "integrand2", "integrand3", "integrand4"))%dopar%{
                                     adaptIntegrate(fun[[l]], lower=c(0,0),
                                                    upper=c(2 * pi,2*pi),
                                                    maxEval = maxEval)$integral
                                   }

      repr1 = function(theta1,theta2) {
        2*sigma1*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)),
                                       c(-sin(theta1), cos(theta1)))%*%
                                   as.numeric(xtr[i,])-
                        cbind(c(cos(theta2), sin(theta2)),
                              c(-sin(theta2), cos(theta2)))%*%
                          as.numeric(xtr[j,]))^2)*(l1^2))
      }

      integrand1 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr1(theta1,theta2)*cos(theta1)*cos(theta2)
      }

      integrand2 = function(theta) {

        theta1=theta[1]
        theta2=theta[2]
        -repr1(theta1,theta2)*cos(theta1)*sin(theta2)

      }

      integrand3 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        -repr1(theta1,theta2)*sin(theta1)*cos(theta2)
      }

      integrand4 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr1(theta1,theta2)*sin(theta1)*sin(theta2)
      }
      fun=list(integrand1,integrand2,integrand3,integrand4)
      results2=foreach(l=1:4,.packages = c("cubature"), .combine = cbind,
                       .export = c("l1", "l2", "sigma1", "sigma2",
                                   "repr1", "repr2", "integrand1","xtr","ytr",
                                   "integrand2", "integrand3", "integrand4"))%dopar%{
                                     adaptIntegrate(fun[[l]], lower=c(0,0),
                                                    upper=c(2 * pi,2*pi),
                                                    maxEval =maxEval)$integral
                                   }

      repr1 = function(theta1,theta2) {
        sigma1^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)),
                                       c(-sin(theta1), cos(theta1)))%*%
                                   as.numeric(xtr[i,])-
                                 cbind(c(cos(theta2), sin(theta2)),
                                       c(-sin(theta2), cos(theta2)))%*%
                                   as.numeric(xtr[j,]))^2)*(l1^2))
      }

      repr2 = function(theta1,theta2){
        sigma2^2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)),
                                       c(-sin(theta1), cos(theta1)))%*%
                                   as.numeric(xtr[i,])-
                                 cbind(c(cos(theta2), sin(theta2)),
                                       c(-sin(theta2), cos(theta2)))%*%
                                   as.numeric(xtr[j,]))^2)*(l2^2))

      }

      integrand1 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*sin(theta1)*sin(theta2)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l2)
      }

      integrand2 = function(theta) {

        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*cos(theta2)*sin(theta1)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l2)
      }

      integrand3 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*sin(theta2)*cos(theta1)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l2)
      }

      integrand4 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*cos(theta1)*cos(theta2)*
          sum((cbind(c(cos(theta1), sin(theta1)),
                     c(-sin(theta1), cos(theta1)))%*%as.numeric(xtr[i,])-          
                 cbind(c(cos(theta2), sin(theta2)), c(-sin(theta2), cos(theta2)))%*%
                 as.numeric(xtr[j,]))^2)*(-l2)
      }
      fun=list(integrand1,integrand2,integrand3,integrand4)

      results3=foreach(l=1:4,.packages = c("cubature"), .combine = cbind,
                       .export = c("l1", "l2", "sigma1", "sigma2",
                                   "repr1", "repr2", "integrand1","xtr","ytr",
                                   "integrand2", "integrand3", "integrand4"))%dopar%{
                                     adaptIntegrate(fun[[l]], lower=c(0,0),
                                                    upper=c(2 * pi,2*pi),
                                                    maxEval = maxEval)$integral
                                   }

      repr2 = function(theta1,theta2) {
        2*sigma2*exp(-0.5*sum((cbind(c(cos(theta1), sin(theta1)),
                                       c(-sin(theta1), cos(theta1)))%*%
                                   as.numeric(xtr[i,])-
                        cbind(c(cos(theta2), sin(theta2)),
                              c(-sin(theta2),cos(theta2)))%*%
                          as.numeric(xtr[j,]))^2)*(l2^2))
      }

      integrand1 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*sin(theta1)*sin(theta2)
      }

      integrand2 = function(theta) {

        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*sin(theta1)*cos(theta2)

      }

      integrand3 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*cos(theta1)*sin(theta2)
      }

      integrand4 = function(theta) {
        theta1=theta[1]
        theta2=theta[2]
        repr2(theta1,theta2)*cos(theta1)*cos(theta2)
      }
      fun=list(integrand1,integrand2,integrand3,integrand4)

      results4=foreach(l=1:4,.packages = c("cubature"), .combine = cbind,
                       .export = c("l1", "l2", "sigma1", "sigma2",
                                   "repr1", "repr2", "integrand1","xtr","ytr",
                                   "integrand2", "integrand3", "integrand4"))%dopar%{
                                     adaptIntegrate(fun[[l]], lower=c(0,0),
                                                    upper=c(2 * pi,2*pi),
                                                    maxEval = maxEval)$integral
                                   }
 K_l1[i, j] = results1[1]
      K_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results1[4]
      K_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results1[3]
      K_l1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results1[2]

      K_sigma1[i, j] = results2[1]
      K_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results2[4]
      K_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results2[3]
      K_sigma1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results2[2]

      K_l2[i, j] = results3[1]
      K_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results3[4]
      K_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results3[3]
      K_l2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results3[2]

      K_sigma2[i, j] = results4[1]
      K_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results4[4]
      K_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results4[3]
      K_sigma2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results4[2]

    }


  }
  inv_K=solve(K)

  grad=0.5*c(-t(c(ytr[,1],ytr[,2]))%*%inv_K%*%K_l1%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%K_l1),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%K_sigma1%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%K_sigma1),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%K_l2%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%K_l2),
             -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%K_sigma2%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%K_sigma2),
             -2*sigma_obs*t(c(ytr[,1],ytr[,2]))%*%inv_K%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(2*sigma_obs*inv_K))

  return(grad)
}

Gradient check

Ytr=matrix(runif(20),ncol=2);Xtr=matrix(runif(20),ncol=2)

nloptr(runif(5),function(x) {
  log_likelihood(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_standard(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] = -4.873039e+00 ~ -4.873039e+00   [7.636513e-08]
##   eval_grad_f[ 2 ] =  3.232570e-01 ~  3.232570e-01   [2.717994e-08]
##   eval_grad_f[ 3 ] =  2.702668e+01 ~  2.702668e+01   [2.198297e-09]
##   eval_grad_f[ 4 ] =  5.532766e-01 ~  5.532767e-01   [1.935148e-07]
##   eval_grad_f[ 5 ] =  7.651816e-02 ~  7.651818e-02   [2.981544e-07]
## 
## Call:
## nloptr(x0 = runif(5), eval_f = function(x) {
##     log_likelihood(Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
## }, eval_grad_f = function(x) {
##     grad_standard(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 29 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  2.80406892511985 
## Optimal value of controls: 0.6952707 0.4647874 0.1985335 0.3078215 1.204242
nloptr(runif(5),function(x) {
  log_likelihood_helm(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_helm(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] = -8.217162e+01 ~ -8.217162e+01   [4.954212e-08]
##   eval_grad_f[ 2 ] =  1.563809e+01 ~  1.563809e+01   [1.009482e-08]
##   eval_grad_f[ 3 ] = -3.902900e+01 ~ -3.902900e+01   [5.403983e-08]
##   eval_grad_f[ 4 ] =  1.341967e+01 ~  1.341967e+01   [6.246923e-08]
##   eval_grad_f[ 5 ] =  4.228628e+00 ~  4.228627e+00   [1.952183e-07]
## 
## Call:
## nloptr(x0 = runif(5), eval_f = function(x) {
##     log_likelihood_helm(Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
## }, eval_grad_f = function(x) {
##     grad_helm(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 26 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  4.77788344246936 
## Optimal value of controls: 27.46812 -5.234488 12.23421 -7.190854 -0.2488243
nloptr(runif(5),function(x) {
  log_likelihood_fund(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_fund(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] = 2.145099e-02 ~ 2.145123e-02   [1.134500e-05]
##   eval_grad_f[ 2 ] = 1.359441e+00 ~ 1.359441e+00   [6.369021e-08]
##   eval_grad_f[ 3 ] = 1.624924e+01 ~ 1.624924e+01   [2.517955e-09]
##   eval_grad_f[ 4 ] = 6.915565e-02 ~ 6.915593e-02   [4.044863e-06]
##   eval_grad_f[ 5 ] = 1.311659e-02 ~ 1.311660e-02   [6.984737e-07]
## 
## Call:
## nloptr(x0 = runif(5), eval_f = function(x) {
##     log_likelihood_fund(Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
## }, eval_grad_f = function(x) {
##     grad_fund(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 43 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  10.2131759998555 
## Optimal value of controls: 0.6960769 -0.1567802 0.3494124 0.4994047 3.511851
nloptr(runif(5),function(x) {
  log_likelihood_eq(
    Ytr[1:2,], Xtr[1:2,], x[1], x[2], x[3], x[4], x[5],maxEval = 2000)
},function(x) {
  grad_eq(Xtr[1:2,], Ytr[1:2,], x[1], x[2], x[3], x[4], x[5],maxEval = 2000)
},opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 4 error(s) detected.
## 
## * eval_grad_f[ 1 ] = -2.633250e-02 ~ -3.059232e-02   [1.392450e-01]
## * eval_grad_f[ 2 ] = -6.619398e-03 ~ -1.085865e-02   [3.904035e-01]
## * eval_grad_f[ 3 ] = -2.281670e+00 ~ -2.285962e+00   [1.877406e-03]
## * eval_grad_f[ 4 ] = -8.346950e+00 ~ -8.355288e+00   [9.978772e-04]
##   eval_grad_f[ 5 ] = -4.018503e+00 ~ -4.018503e+00   [8.912987e-08]
## 
## Call:
## nloptr(x0 = runif(5), eval_f = function(x) {
##     log_likelihood_eq(Ytr[1:2, ], Xtr[1:2, ], x[1], x[2], x[3], 
##         x[4], x[5], maxEval = 2000)}, eval_grad_f = function(x) {
##     grad_eq(Xtr[1:2, ], Ytr[1:2, ], x[1], x[2], x[3], x[4], x[5], 
##         maxEval = 2000)
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 5 ( NLOPT_MAXEVAL_REACHED: Optimization stopped because 
## maxeval (above) was reached. )
## 
## Number of Iterations....: 100 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Current value of objective function:  2.42730721004063 
## Current value of controls: 0.02595316 0.07133829 0.0800657 2.617406 -0.2137168

Adam optimizer

Adam = function(f, grad_f, params_init, lr = 0.01, beta1 = 0.9, beta2 = 0.999,
                epsilon = 1e-8, max_iter = 100, tol = 1e-10,
                lb = rep(0, length(params_init)),
                ub = rep(Inf, length(params_init))) {
  
  # Initialize progress bar if not in cluster
  if (notcluster) {
    pb = progress_bar$new(
      format = "  Progress [:bar] :percent in :elapsed",
      total = max_iter,
      clear = FALSE,
      width = 60
    )
  }
  
  params = params_init
  m = numeric(length(params))
  v = numeric(length(params))
  iter = 0
  
  while (iter < max_iter) {
    iter = iter + 1
    
    # Compute gradient
    grad = grad_f(params)
    
    # Update moments
    m = beta1 * m + (1 - beta1) * grad
    v = beta2 * v + (1 - beta2) * (grad^2)
    
    # Compute bias-corrected moments
    m_hat = m / (1 - beta1^iter)
    v_hat = v / (1 - beta2^iter)
    
    # Update parameters
    params_new = params - lr * m_hat / (sqrt(v_hat) + epsilon)
    
    # Handle boundary constraints
    params_new[is.na(params_new)] = params[is.na(params_new)]
    params_new[params_new <= lb] = params[params_new <= lb]
    params_new[params_new > ub] = params[params_new > ub]
    
    # Check convergence
    if (max(abs(params_new - params)) < tol) {
      break
    }
    
    params = params_new
    
    
    if (notcluster) {
      pb$tick()
    }
  }
  
  return(params)
}

First vector field

initial_par=rep(1,4)
initial_sigma_obs=0.1

set.seed(12)
Xtr=cbind(
  -c(1.45,1.1,1.5,1.5,1.28,1.15,1.53,1.27)/1.6,
  1/1.2*c(1.13,1,0.88,0.32,0.02,-0.45,-0.88,-1.07)
)
sigma_obs=0.15
set.seed(12)
noise=rnorm(16,0,sigma_obs)
Ytr=cbind(-Xtr[,2],Xtr[,1])+cbind(noise[1:8],noise[9:16])



ngrid=17

ny=seq(-1,1,l=ngrid)
nx=seq(-1,1,l=ngrid)

Xte=expand.grid(nx,ny)
Yte=cbind(-Xte[,2],Xte[,1])

Xte1=Xte;Yte1=Yte;Xtr1=Xtr;Ytr1=Ytr
plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main = "Vector field 1")
points(Xtr,col=2,pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.2, col = 2)
quiver(Xte[,1], Xte[,2], Yte[,1 ],Yte[,2],scale=.2, col = 1)

Fitting of the GP

initial_sigma_obs=.1
initial_sigma1=initial_sigma2=initial_l1=initial_l2=1

opt_par=Adam(function(x) {
  log_likelihood(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_standard(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},c(initial_sigma1,
    initial_sigma2,
    initial_sigma_obs,
    initial_l1,
    initial_l2),lr=lr,max_iter = max_iter)

Ktetr_standard=cov_mat(Xte,Xtr,opt_par[1],
                       opt_par[2],opt_par[4],
                       opt_par[5])

Ktetr_trtr_inv_standard=Ktetr_standard%*%inv(cov_mat(Xtr,Xtr,opt_par[1],
                                                     opt_par[2], opt_par[4],
                                                     opt_par[5])
                                             
                                             +diag(opt_par[3]^2,nrow=2*nrow(Xtr)))

posterior_mean_standard1=Ktetr_trtr_inv_standard%*%c(Ytr[,1],Ytr[,2])



rmse_standard1=mean(apply((cbind(
  posterior_mean_standard1[1:(0.5*length(posterior_mean_standard1))],
  posterior_mean_standard1
  [(0.5*length(posterior_mean_standard1)+1):(length(posterior_mean_standard1))])
  -Yte)^2,1,sum))^.5



posterior_cov_standard1= cov_mat(Xte,Xte,opt_par[1],
                                opt_par[2],opt_par[4],opt_par[5])-Ktetr_trtr_inv_standard%*%t(Ktetr_standard)


LogS_standard1=(t(c(Yte[,1],Yte[,2])-posterior_mean_standard1)%*%
                  inv(posterior_cov_standard1+diag(opt_par[3]^2,2*nrow(Xte)))%*%
                  (c(Yte[,1],Yte[,2])-posterior_mean_standard1)
                +determinant(posterior_cov_standard1+diag(opt_par[3]^2,2*nrow(Xte))
                )$modulus[1]+
                  log(2*pi)*nrow(Xte))/nrow(Xte)



opt_par=Adam(function(x) {
  log_likelihood_helm(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_helm(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},c(initial_par[1:4],initial_sigma_obs),lr=lr,max_iter=max_iter)^.5

Ktetr_helm=cov_mat_helm(Xte,Xtr,opt_par[1],
                        opt_par[2],
                        opt_par[3],
                        opt_par[4])

Ktetr_trtr_inv_helm=Ktetr_helm%*%inv(cov_mat_helm(Xtr,Xtr,opt_par[1],
                                                  opt_par[2],
                                                  opt_par[3],
                                                  opt_par[4])+diag(opt_par[5]^2,nrow=2*nrow(Xtr)))

posterior_mean_helm1=Ktetr_trtr_inv_helm%*%c(Ytr[,1],Ytr[,2])



rmse_helm1=mean(apply((cbind(posterior_mean_helm1[1:(0.5*length(posterior_mean_helm1))],
                             posterior_mean_helm1[(0.5*length(posterior_mean_helm1)+1):
                                                   (length(posterior_mean_helm1))])
                       -Yte)^2,1,sum))^.5

posterior_cov_helm1= cov_mat_helm(Xte,Xte,opt_par[1],
                                 opt_par[2],
                                 opt_par[3],
                                 opt_par[4])-Ktetr_trtr_inv_helm%*%t(Ktetr_helm)


LogS_helm1=(t(c(Yte[,1],Yte[,2])-posterior_mean_helm1)%*%
              inv(posterior_cov_helm1+diag(opt_par[5]^2,2*nrow(Xte)))%*%
              (c(Yte[,1],Yte[,2])-posterior_mean_helm1)
            +determinant(posterior_cov_helm1+diag(opt_par[5]^2,2*nrow(Xte)))$modulus[1]+
              log(2*pi)*nrow(Xte))/nrow(Xte)



opt_par=Adam(function(x) {
  log_likelihood_fund(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_fund(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},c(initial_sigma1,
    initial_sigma2,  initial_sigma_obs,
    initial_l1,
    initial_l2),lr=lr,max_iter = max_iter)

Ktetr_fund=fund_cov_mat(Xte,Xtr,opt_par[1],
                            opt_par[2],opt_par[4],
                            opt_par[5])

Ktetr_trtr_inv_fund=Ktetr_fund%*%inv(fund_cov_mat(Xtr,Xtr,opt_par[1],
                                                      opt_par[2],opt_par[4],
                                                      opt_par[5])+diag(opt_par[3]^2,nrow=2*nrow(Xtr)))

posterior_mean_fund1=Ktetr_trtr_inv_fund%*%c(Ytr[,1],Ytr[,2])



rmse_fund1=mean(apply((cbind(
  posterior_mean_fund1[1:(0.5*length(posterior_mean_fund1))],
  posterior_mean_fund1
  [(0.5*length(posterior_mean_fund1)+1):(length(posterior_mean_fund1))])
  -Yte)^2,1,sum))^.5



posterior_cov_fund1= fund_cov_mat(Xte,Xte,opt_par[1],
                                     opt_par[2], opt_par[4],
                                     opt_par[5])-Ktetr_trtr_inv_fund%*%t(Ktetr_fund)


LogS_fund1=(t(c(Yte[,1],Yte[,2])-posterior_mean_fund1)%*%
              inv(posterior_cov_fund1+diag(opt_par[3]^2,2*nrow(Xte)))%*%
              (c(Yte[,1],Yte[,2])-posterior_mean_fund1)
            +determinant(posterior_cov_fund1+diag(opt_par[3]^2,2*nrow(Xte))
            )$modulus[1]+
              log(2*pi)*nrow(Xte))/nrow(Xte)

# We skip the computation for the double integration kernel for time reasons here.
load("~/Documents/Equivariant_results_first_examples_1.rda")

posterior_mean_eq1=res[[1]]

rmse_eq1=mean(apply((cbind(
  posterior_mean_eq1[1:(0.5*length(posterior_mean_eq1))],
  posterior_mean_eq1
  [(0.5*length(posterior_mean_eq1)+1):(length(posterior_mean_eq1))])
  -Yte)^2,1,sum))^.5


opt_par=res[[3]]
posterior_cov_eq1=res[[2]]
posterior_mean_eq1=c(posterior_mean_eq1[,1],posterior_mean_eq1[,2])
LogS_eq1=(t(c(Yte[,1],Yte[,2])-posterior_mean_eq1)%*%
            inv(posterior_cov_eq1+diag(opt_par[5]^2,2*nrow(Xte)))%*%
            (c(Yte[,1],Yte[,2])-posterior_mean_eq1)
          +determinant(posterior_cov_eq1+diag(opt_par[5]^2,2*nrow(Xte))
          )$modulus[1]+
            log(2*pi)*nrow(Xte))/nrow(Xte)




svd_standard1=svd(posterior_cov_standard1)
sqrt_standard1=svd_standard1$u%*%sqrt(diag(svd_standard1$d))%*%t(svd_standard1$v)

svd_fund1=svd(posterior_cov_fund1)
sqrt_fund1=svd_fund1$u%*%sqrt(diag(svd_fund1$d))%*%t(svd_fund1$v)


svd_eq1=svd(posterior_cov_eq1)
sqrt_eq1=svd_eq1$u%*%sqrt(diag(svd_eq1$d))%*%t(svd_eq1$v)


svd_helm1=svd(posterior_cov_helm1)
sqrt_helm1=svd_helm1$u%*%sqrt(diag(svd_helm1$d))%*%t(svd_helm1$v)

set.seed(12)
I=rnorm(2*nrow(Xte))


sim_standard1=posterior_mean_standard1+sqrt_standard1%*%I
sim_standard1=cbind(sim_standard1[1:(0.5*length(posterior_mean_standard1))],
                   sim_standard1[(0.5*length(posterior_mean_standard1)+1):(length(posterior_mean_standard1))])

sim_fund1=posterior_mean_fund1+sqrt_fund1%*%I
sim_fund1=cbind(sim_fund1[1:(0.5*length(posterior_mean_fund1))],
               sim_fund1[(0.5*length(posterior_mean_fund1)+1):(length(posterior_mean_fund1))])

sim_eq1=posterior_mean_eq1+sqrt_eq1%*%I
sim_eq1=cbind(sim_eq1[1:(0.5*length(posterior_mean_eq1))],
             sim_eq1[(0.5*length(posterior_mean_eq1)+1):(length(posterior_mean_eq1))])


sim_helm1=posterior_mean_helm1+sqrt_helm1%*%I
sim_helm1=cbind(sim_helm1[1:(0.5*length(posterior_mean_helm1))],
               sim_helm1[(0.5*length(posterior_mean_helm1)+1):(length(posterior_mean_helm1))])

Second vector field

set.seed(123)



bd=0.5
nx=seq(-2,2,l=20)
ny=nx

Xte=expand.grid(nx,ny)

Yte=cbind(Xte[,1]/(bd+(Xte[,1]^2+Xte[,2]^2)^2),Xte[,2]/(bd+(Xte[,1]^2+Xte[,2]^2)^2))


#Xtr=cbind(runif(10,-2,2),runif(10,-2,2))
Xtr=cbind(
  c(-0.35,0.88,-0.06,0.105,0.27,1.35,-0.03,-0.015,-0.3,-0.11)*2/1.4,
  2*c(0.95,0.63,0.2,0.065,0,0,-0.095,-0.2,-0.85,-0.95)
)

Ytr=cbind(Xtr[,1]/(bd+(Xtr[,1]^2+Xtr[,2]^2)^2),Xtr[,2]/(bd+(Xtr[,1]^2+Xtr[,2]^2)^2))

sigma_obs=.1
noise=rnorm(length(Xtr),0,sigma_obs)
Ytr=Ytr+cbind(noise[1:(length(Xtr)/2)],noise[(length(Xtr)/2+1):length(Xtr)])

plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main = "Vector field 2")
points(Xtr,col=2,pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], Yte[,1 ],Yte[,2],scale=.4, col = 1)

opt_par=Adam(function(x) {
  log_likelihood(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_standard(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},c(initial_sigma1,
    initial_sigma2,
    initial_sigma_obs,
    initial_l1,
    initial_l2),lr=lr,max_iter = max_iter)

Ktetr_standard=cov_mat(Xte,Xtr,opt_par[1],
                       opt_par[2],
                       opt_par[4],
                       opt_par[5])

Ktetr_trtr_inv_standard=Ktetr_standard%*%inv(cov_mat(Xtr,Xtr,opt_par[1],
                                                     opt_par[2],
                                                     opt_par[4],
                                                     opt_par[5])+diag(opt_par[3]^2,nrow=2*nrow(Xtr)))

posterior_mean_standard=Ktetr_trtr_inv_standard%*%c(Ytr[,1],Ytr[,2])



rmse_standard=mean(apply((cbind(
  posterior_mean_standard[1:(0.5*length(posterior_mean_standard))],
  posterior_mean_standard
  [(0.5*length(posterior_mean_standard)+1):(length(posterior_mean_standard))])
  -Yte)^2,1,sum))^.5



posterior_cov_standard= cov_mat(Xte,Xte,opt_par[1],
                                opt_par[2],opt_par[4],
                                opt_par[5])-Ktetr_trtr_inv_standard%*%t(Ktetr_standard)


LogS_standard=(t(c(Yte[,1],Yte[,2])-posterior_mean_standard)%*%
                  inv(posterior_cov_standard+diag(opt_par[3]^2,2*nrow(Xte)))%*%
                  (c(Yte[,1],Yte[,2])-posterior_mean_standard)
                +determinant(posterior_cov_standard+diag(opt_par[3]^2,2*nrow(Xte))
                )$modulus[1]+
                  log(2*pi)*nrow(Xte))/nrow(Xte)



opt_par=Adam(function(x) {
  log_likelihood_helm(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_helm(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},c(initial_par[1:4],initial_sigma_obs),lr=lr,max_iter=max_iter)^.5

Ktetr_helm=cov_mat_helm(Xte,Xtr,opt_par[1],
                        opt_par[2],
                        opt_par[3],
                        opt_par[4])

Ktetr_trtr_inv_helm=Ktetr_helm%*%inv(cov_mat_helm(Xtr,Xtr,opt_par[1],
                                                  opt_par[2],
                                                  opt_par[3],
                                                  opt_par[4])+diag(opt_par[5]^2,nrow=2*nrow(Xtr)))

posterior_mean_helm=Ktetr_trtr_inv_helm%*%c(Ytr[,1],Ytr[,2])



rmse_helm=mean(apply((cbind(posterior_mean_helm[1:(0.5*length(posterior_mean_helm))],
                             posterior_mean_helm[(0.5*length(posterior_mean_helm)+1):
                                                   (length(posterior_mean_helm))])
                       -Yte)^2,1,sum))^.5

posterior_cov_helm= cov_mat_helm(Xte,Xte,opt_par[1],
                                 opt_par[2],
                                 opt_par[3],
                                 opt_par[4])-Ktetr_trtr_inv_helm%*%t(Ktetr_helm)


LogS_helm=(t(c(Yte[,1],Yte[,2])-posterior_mean_helm)%*%
              inv(posterior_cov_helm+diag(opt_par[5]^2,2*nrow(Xte)))%*%
              (c(Yte[,1],Yte[,2])-posterior_mean_helm)
            +determinant(posterior_cov_helm+diag(opt_par[5]^2,2*nrow(Xte)))$modulus[1]+
              log(2*pi)*nrow(Xte))/nrow(Xte)



opt_par=Adam(function(x) {
  log_likelihood_fund(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5])
},function(x) {
  grad_fund(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5])
},c(initial_sigma1,
    initial_sigma2,
    
    initial_sigma_obs,
    initial_l1,
    initial_l2),lr=lr,max_iter = max_iter)

Ktetr_fund=fund_cov_mat(Xte,Xtr,opt_par[1],
                            opt_par[2],
                            
                            opt_par[4],
                            opt_par[5])

Ktetr_trtr_inv_fund=Ktetr_fund%*%inv(fund_cov_mat(Xtr,Xtr,opt_par[1],
                                                      opt_par[2],opt_par[4],
                                                      opt_par[5]) +diag(opt_par[3]^2,nrow=2*nrow(Xtr)))

posterior_mean_fund=Ktetr_trtr_inv_fund%*%c(Ytr[,1],Ytr[,2])



rmse_fund=mean(apply((cbind(
  posterior_mean_fund[1:(0.5*length(posterior_mean_fund))],
  posterior_mean_fund
  [(0.5*length(posterior_mean_fund)+1):(length(posterior_mean_fund))])
  -Yte)^2,1,sum))^.5



posterior_cov_fund= fund_cov_mat(Xte,Xte,opt_par[1],
                                     opt_par[2], opt_par[4],
                                     opt_par[5])-Ktetr_trtr_inv_fund%*%t(Ktetr_fund)


LogS_fund=(t(c(Yte[,1],Yte[,2])-posterior_mean_fund)%*%
              inv(posterior_cov_fund+diag(opt_par[3]^2,2*nrow(Xte)))%*%
              (c(Yte[,1],Yte[,2])-posterior_mean_fund)
            +determinant(posterior_cov_fund+diag(opt_par[3]^2,2*nrow(Xte))
            )$modulus[1]+
              log(2*pi)*nrow(Xte))/nrow(Xte)

load("~/Documents/Equivariant_results_first_examples_2.rda")

posterior_mean_eq=res[[1]]

rmse_eq=mean(apply((cbind(
  posterior_mean_eq[1:(0.5*length(posterior_mean_eq))],
  posterior_mean_eq
  [(0.5*length(posterior_mean_eq)+1):(length(posterior_mean_eq))])
  -Yte)^2,1,sum))^.5


opt_par=res[[3]]
posterior_cov_eq=res[[2]]
posterior_mean_eq=c(posterior_mean_eq[,1],posterior_mean_eq[,2])
LogS_eq=(t(c(Yte[,1],Yte[,2])-posterior_mean_eq)%*%
            inv(posterior_cov_eq+diag(opt_par[5]^2,2*nrow(Xte)))%*%
            (c(Yte[,1],Yte[,2])-posterior_mean_eq)
          +determinant(posterior_cov_eq+diag(opt_par[5]^2,2*nrow(Xte))
          )$modulus[1]+
            log(2*pi)*nrow(Xte))/nrow(Xte)


svd_standard=svd(posterior_cov_standard)
sqrt_standard=svd_standard$u%*%sqrt(diag(svd_standard$d))%*%t(svd_standard$v)

svd_fund=svd(posterior_cov_fund)
sqrt_fund=svd_fund$u%*%sqrt(diag(svd_fund$d))%*%t(svd_fund$v)


svd_eq=svd(posterior_cov_eq)
sqrt_eq=svd_eq$u%*%sqrt(diag(svd_eq$d))%*%t(svd_eq$v)


svd_helm=svd(posterior_cov_helm)
sqrt_helm=svd_helm$u%*%sqrt(diag(svd_helm$d))%*%t(svd_helm$v)

set.seed(123)
I=rnorm(2*nrow(Xte))


sim_standard=posterior_mean_standard+sqrt_standard%*%I
sim_standard=cbind(sim_standard[1:(0.5*length(posterior_mean_standard))],
                   sim_standard[(0.5*length(posterior_mean_standard)+1):(length(posterior_mean_standard))])

sim_fund=posterior_mean_fund+sqrt_fund%*%I
sim_fund=cbind(sim_fund[1:(0.5*length(posterior_mean_fund))],
               sim_fund[(0.5*length(posterior_mean_fund)+1):(length(posterior_mean_fund))])

sim_eq=posterior_mean_eq+sqrt_eq%*%I
sim_eq=cbind(sim_eq[1:(0.5*length(posterior_mean_eq))],
             sim_eq[(0.5*length(posterior_mean_eq)+1):(length(posterior_mean_eq))])


sim_helm=posterior_mean_helm+sqrt_helm%*%I
sim_helm=cbind(sim_helm[1:(0.5*length(posterior_mean_helm))],
               sim_helm[(0.5*length(posterior_mean_helm)+1):(length(posterior_mean_helm))])

Posterior means

par(mfrow=c(2,5),mar=c(2,1,2,1))

plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main = "Vector field 1")
points(Xtr1,col=2,pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], Yte1[,1 ],Yte1[,2],scale=.2, col = 1)

if(ncol(posterior_mean_standard1)==2){
  data=posterior_mean_standard1
}else{data=cbind(posterior_mean_standard1[1:(0.5*length(posterior_mean_standard1))],
                 posterior_mean_standard1[(0.5*length(posterior_mean_standard1)+1):(length(posterior_mean_standard1))])}

plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Squared exponential\nRMSE=",round(rmse_standard1,3)))
points(Xtr1,col=2,pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)

if(ncol(posterior_mean_helm1)==2){
  data=posterior_mean_helm1
}else{data=cbind(posterior_mean_helm1[1:(0.5*length(posterior_mean_helm1))],
                 posterior_mean_helm1[(0.5*length(posterior_mean_helm1)+1):(length(posterior_mean_helm1))])}

plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Helmholtz\nRMSE=",round(rmse_helm1,3)))
points(Xtr1,col=2, pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)






if(ncol(posterior_mean_fund1)==2){
  data=posterior_mean_fund1
}else{data=cbind(posterior_mean_fund1[1:(0.5*length(posterior_mean_fund1))],
                 posterior_mean_fund1[(0.5*length(posterior_mean_fund1)+1):(length(posterior_mean_fund1))])}


plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Fundamental\nRMSE=",round(rmse_fund1,3)))
points(Xtr1,col=2, pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)



data=cbind(posterior_mean_eq1[1:(0.5*length(posterior_mean_eq1))],
           posterior_mean_eq1[(0.5*length(posterior_mean_eq1)+1):(length(posterior_mean_eq1))])


plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Double integration\nRMSE=",round(rmse_eq1,3)))
points(Xtr1,col=2, pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)



plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main = "Vector field 2")
points(Xtr,col=2,pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], Yte[,1 ],Yte[,2],scale=.4, col = 1)

if(ncol(posterior_mean_standard)==2){
  data=posterior_mean_standard
}else{data=cbind(posterior_mean_standard[1:(0.5*length(posterior_mean_standard))],
                 posterior_mean_standard[(0.5*length(posterior_mean_standard)+1):(length(posterior_mean_standard))])}

plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Squared exponential\nRMSE=",round(rmse_standard,3)))
points(Xtr,col=2,pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)

if(ncol(posterior_mean_helm)==2){
  data=posterior_mean_helm
}else{data=cbind(posterior_mean_helm[1:(0.5*length(posterior_mean_helm))],
                 posterior_mean_helm[(0.5*length(posterior_mean_helm)+1):(length(posterior_mean_helm))])}

plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Helmholtz\nRMSE=",round(rmse_helm,3)))
points(Xtr,col=2, pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)






if(ncol(posterior_mean_fund)==2){
  data=posterior_mean_fund
}else{data=cbind(posterior_mean_fund[1:(0.5*length(posterior_mean_fund))],
                 posterior_mean_fund[(0.5*length(posterior_mean_fund)+1):(length(posterior_mean_fund))])}


plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Fundamental\nRMSE=",round(rmse_fund,3)))
points(Xtr,col=2, pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)



data=cbind(posterior_mean_eq[1:(0.5*length(posterior_mean_eq))],
           posterior_mean_eq[(0.5*length(posterior_mean_eq)+1):(length(posterior_mean_eq))])


plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Double integration\nRMSE=",round(rmse_eq,3)))
points(Xtr,col=2, pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)

Posterior simulations

par(mar=c(2,1,2,1),mfrow=c(2,4))

data=sim_standard1
plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Squared exponential\nLogS=",round(LogS_standard1,3)))
points(Xtr1,col=2,pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)

data=sim_helm1

plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Helmholtz\nLogS=",round(LogS_helm1,3)))
points(Xtr1,col=2, pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)



data=sim_fund1

plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Fundamental\nLogS=",round(LogS_fund1,3)))
points(Xtr1,col=2, pch=19)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)


data=sim_eq1

plot(rbind(Xtr1,as.matrix(Xte1)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",
     ylab="",main=paste("Double integration\nLogS=",round(LogS_eq1,3)))
points(Xtr1,col=2, pch=19,cex=.3)
quiver(Xtr1[,1], Xtr1[,2], Ytr1[,1 ],Ytr1[,2],scale=.2, col = 2)
quiver(Xte1[,1], Xte1[,2], data[,1 ],data[,2],scale=.2, col = 4)


data=sim_standard
plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Squared exponential\nLogS=",round(LogS_standard,3)))
points(Xtr,col=2,pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)

data=sim_helm

plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Helmholtz\nLogS=",round(LogS_helm,3)))
points(Xtr,col=2, pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)



data=sim_fund

plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",ylab="",main=paste("Fundamental\nLogS=",round(LogS_fund,3)))
points(Xtr,col=2, pch=19)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)


data=sim_eq

plot(rbind(Xtr,as.matrix(Xte)),pch=19,cex=.2,xaxt="n",yaxt="n",xlab=" ",
     ylab="",main=paste("Double integration\nLogS=",round(LogS_eq,3)))
points(Xtr,col=2, pch=19,cex=.3)
quiver(Xtr[,1], Xtr[,2], Ytr[,1 ],Ytr[,2],scale=.4, col = 2)
quiver(Xte[,1], Xte[,2], data[,1 ],data[,2],scale=.4, col = 4)

Experiment 2: Ocean velocities with equivariant noise

Data

scale_data=1

ALPHA=seq(0,1,by=.2)

gulf_data_train = read.csv(
  "https://raw.githubusercontent.com/JaxGaussianProcesses/static/main/data/gulfdata_train.csv"
)

xtr=cbind(gulf_data_train$lon,gulf_data_train$lat)
ytr=cbind(gulf_data_train$ubar,gulf_data_train$vbar)

gulf_data_test = read.csv(
  "https://raw.githubusercontent.com/JaxGaussianProcesses/static/main/data/gulfdata_test.csv"
)

xte=cbind(gulf_data_test$lon,gulf_data_test$lat)
yte=cbind(gulf_data_test$ubar,gulf_data_test$vbar)

X=rbind(xtr,xte)
Y=rbind(ytr,yte)


if(scale_data){
  X=scale(rbind(xtr,xte))
  Y=scale(rbind(ytr,yte))
}

center=apply(X,2,function(x){mean(range(x))})
F=function(x){(x-center)/(bd+(norm(x-center)^2))}
Eq_noise=scale(t(apply(X,1,F)))

par(mfrow=c(2,2),mar=rep(2,4))
for(a in c(1,.75,.5,0)){
  Y1=a*Y+(1-a)*Eq_noise
  plot(X,xaxt="n",yaxt="n",xlab=" ",ylab="",
       main=bquote(alpha == .(a)))
  quiver(X[,1], X[,2],Y1[1:564],Y1[565:1128] ,col = 4,scale=.1)
}

Mixture covariance kernels

fund_cov_mat_helm= function(x1, x2, sigma1,sigma2,l1,l2) {
  x1=as.matrix(x1);x2=as.matrix(x2)
  cov = matrix(0, ncol =2 * ifelse(length(as.matrix(x2)) == 2, 1,nrow(x2)),
               nrow = 2 * ifelse(length(as.matrix(x1)) == 2, 1, nrow(x1)))

  for(i in 1:(length(as.matrix(x1))/2)){
    #if(i%%100==0){print(i)}
    theta1=ifelse(length(as.matrix(x1))==2,atan2(x1[2],x1[1]),
                  atan2(x1[i,2],x1[i,1]))
    for (j in 1:(length(as.matrix(x2))/2)){
      r1=ifelse(length(as.matrix(x1))==2,sum((x1)^2)^.5,
                sum((x1[i,])^2)^.5)
      r2=ifelse(length(as.matrix(x2))==2,sum((x2)^2)^.5,
                sum((x2[j,])^2)^.5)
      #dist=abs(x1[i,1]-x2[j,1])+abs(x1[i,2]-x2[j,2])
      theta2=ifelse(length(as.matrix(x2))==2,atan2(x2[2],x2[1]),
                    atan2(x2[j,2],x2[j,1]))

      cov1=matrix(0,2,2)

      cov1[1,1]=sigma1^2/((l1)^4)*exp(-.5*(r1-r2)^2/(l1^2))*(l1^2-(r1-r2)^2)+
        sigma2^2/((l2)^2)*exp(-.5*(r1-r2)^2/(l2^2))

      cov1[2,2]=sigma1^2/((l1)^2)*exp(-.5*(r1-r2)^2/(l1^2))+
        sigma2^2/((l2)^4)*exp(-.5*(r1-r2)^2/(l2^2))*(l2^2-(r1-r2)^2)

      cov1[1,2]=cov1[2,1]=0

      results=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        cov1%*%cbind(c(cos(theta2),-sin(theta2)),
                     c(sin(theta2),cos(theta2)))

      cov[i, j] = results[1,1]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i,
          ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[2,2]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i, j] = results[2,1]
      cov[i,ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[1,2]
    }
  }
  return(cov)
}

mix_cov_mat_helm2=function(x1, x2, sigma1,sigma2,l1,l2,alpha){
  x1=as.matrix(x1);x2=as.matrix(x2)
  K1=fund_cov_mat_helm(x1, x2, sigma1,sigma2,l1,l2)

  K2=cov_mat_helm(x1, x2,l1,sigma1,l2,sigma2)

  (1-alpha)^2*K1+alpha^2*K2
}

mix_cov_mat2=function(x1, x2, sigma1,sigma2,l1,l2,alpha){
  x1=as.matrix(x1);x2=as.matrix(x2)
  K1=fund_cov_mat(x1, x2, sigma1,sigma2,l1,l2)

  K2=cov_mat(x1, x2,sigma1,sigma2,l1,l2)

  (1-alpha)^2*K1+alpha^2*K2
}

Mixture likelihoods

log_likelihood_helm_mix2=function(ytr,xtr,sigma1,sigma2,tau,l1,l2,alpha){
  if(ncol(ytr)==2){
    ytr=c(ytr[,1],ytr[,2])
  }

  Ktr=mix_cov_mat_helm2(xtr,xtr,sigma1,sigma2,l1,l2,alpha)+tau^2*
    diag(nrow=2*nrow(xtr))
  (ll=-.5*((ytr)%*%inv(Ktr)%*%ytr)-.5*determinant(Ktr,logarithm=1)$modulus-
      nrow(xtr)*log(2*pi))
  ll=ifelse(is.na(ll),1e6,-ll)
  #print(ll)
  return(ll)
}

log_likelihood_mixture2=function(ytr,xtr,sigma1,sigma2,tau,l1,l2,alpha){
  if(ncol(ytr)==2){
    ytr=c(ytr[,1],ytr[,2])
  }


  Ktr=mix_cov_mat2(xtr,xtr,sigma1,sigma2,l1,l2,alpha)+
    tau^2*diag(nrow = 2*nrow(xtr))

  ll=-0.5*t(ytr)%*%inv(Ktr)%*%ytr-0.5*determinant(Ktr,logarithm=1)$modulus-
    nrow(xtr)*log(2*pi)
  #print(c(ll))
  ll=ifelse(is.nan(ll),10^6,-ll)
  #ll=ifelse(ll>0,ll,1e6)

  return(ll)


}

Mixture gradients

grad_mix_helm2=function(xtr,ytr,sigma1,sigma2,tau,l1,l2,alpha){
  x1=x2=xtr
  b=(1-alpha)^2
  K1=fund_cov_mat_helm(x1, x2, sigma1,sigma2,l1,l2)

  K2=cov_mat_helm(x1, x2,l1,sigma1,l2,sigma2)

  K=b*K1+alpha^2*K2+
    tau^2*diag(nrow = 2*nrow(xtr))


  n1=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  n2=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))

  cov = matrix(0, ncol =2 * ifelse(length(as.matrix(x2)) == 2, 1,nrow(x2)),
               nrow = 2 * ifelse(length(as.matrix(x1)) == 2, 1, nrow(x1)))

  del_sigma1=del_sigma2=del_l1=del_l2=cov

  for(i in 1:(length(as.matrix(x1))/2)){
    #if(i%%100==0){print(i)}
    theta1=ifelse(length(as.matrix(x1))==2,atan2(x1[2],x1[1]),
                  atan2(x1[i,2],x1[i,1]))
    for (j in 1:(length(as.matrix(x2))/2)){
      r1=ifelse(length(as.matrix(x1))==2,sum((x1)^2)^.5,
                sum((x1[i,])^2)^.5)
      r2=ifelse(length(as.matrix(x2))==2,sum((x2)^2)^.5,
                sum((x2[j,])^2)^.5)
      #dist=abs(x1[i,1]-x2[j,1])+abs(x1[i,2]-x2[j,2])
      theta2=ifelse(length(as.matrix(x2))==2,atan2(x2[2],x2[1]),
                    atan2(x2[j,2],x2[j,1]))
      cov1=results_sigma1=results_sigma2=results_l1=results_l2=matrix(0,2,2)

      cov1[1,1]=sigma1^2/((l1)^4)*exp(-.5*(r1-r2)^2/(l1^2))*(l1^2-(r1-r2)^2)+
        sigma2^2/((l2)^2)*exp(-.5*(r1-r2)^2/(l2^2))

      cov1[2,2]=sigma1^2/((l1)^2)*exp(-.5*(r1-r2)^2/(l1^2))+sigma2^2/((l2)^4)*
        exp(-.5*(r1-r2)^2/(l2^2))*(l2^2-(r1-r2)^2)

      cov1[1,2]=cov1[2,1]=0

      results=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        cov1%*%cbind(c(cos(theta2),-sin(theta2)),
                     c(sin(theta2),cos(theta2)))

      cov[i, j] = results[1,1]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i,
          ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[2,2]
      cov[ifelse(length(as.matrix(x1))==2,1,nrow(x1)) + i, j] = results[2,1]
      cov[i,ifelse(length(as.matrix(x2))==2,1,nrow(x2)) + j] = results[1,2]
      
      results_l1[1,1]=-4*sigma1^2/((l1)^5)*exp(-.5*(r1-r2)^2/(l1^2))*(l1^2-(r1-r2)^2)+
        sigma1^2/((l1)^4)*exp(-.5*(r1-r2)^2/(l1^2))*((r1-r2)^2/(l1^3)*(l1^2-(r1-r2)^2)+2*l1)
       results_l1[2,2]=sigma1^2/((l1)^2)*exp(-.5*(r1-r2)^2/(l1^2))*(-2/l1+(r1-r2)^2/(l1^3))

      results_l1=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        results_l1%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))
      

      results_sigma1[1,1]=2*sigma1/((l1)^4)*exp(-.5*(r1-r2)^2/(l1^2))*(l1^2-(r1-r2)^2)
      results_sigma1[2,2]=2*sigma1/((l1)^2)*exp(-.5*(r1-r2)^2/(l1^2))

      results_sigma1=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        results_sigma1%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      results_sigma2[1,1]=2*sigma2/((l2)^2)*exp(-.5*(r1-r2)^2/(l2^2))
      results_sigma2[2,2]=2*sigma2/((l2)^4)*exp(-.5*(r1-r2)^2/(l2^2))*(l2^2-(r1-r2)^2)

      results_sigma2=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        results_sigma2%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      results_l2[1,1]=sigma2^2/((l2)^2)*exp(-.5*(r1-r2)^2/(l2^2))*(-2/l2+(r1-r2)^2/(l2^3))
      
      results_l2[2,2]=sigma2^2/((l2)^4)*exp(-.5*(r1-r2)^2/(l2^2))*
        (-4/l2*(l2^2-(r1-r2)^2)+ (r1-r2)^2/(l2^3)*(l2^2-(r1-r2)^2)+2*l2)

      results_l2=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        results_l2%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      del_sigma1[i, j] = results_sigma1[1,1]
      del_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma1[2,2]
      del_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_sigma1[2,1]
      del_sigma1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma1[1,2]

      del_sigma2[i, j] = results_sigma2[1,1]
      del_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma2[2,2]
      del_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_sigma2[2,1]
      del_sigma2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma2[1,2]

      del_l1[i, j] = results_l1[1,1]
      del_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l1[2,2]
      del_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_l1[2,1]
      del_l1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l1[1,2]

      del_l2[i, j] = results_l2[1,1]
      del_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l2[2,2]
      del_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_l2[2,1]
      del_l2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l2[1,2]

    }
  }

  dist_mat=distmat(x1,x2)
  diff_mat=outer(x1[, 1], x2[, 1], "-") * outer(x1[, 2], x2[, 2], "-")
  dist_mat_x1=distmat(cbind(x1[,1],0),cbind(x2[,1],0))
  dist_mat_x2=distmat(cbind(x1[,2],0),cbind(x2[,2],0))


  n1=nrow(x1)
  n2=nrow(x2)


  del_cov1=del_cov2=del_cov3=del_cov4=cov=matrix(0,nrow=2*n1,ncol=2*n2)


  del_cov1[1:n1,1:n2]=-4*sigma1^2/((l1)^5)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x1^2)+
    sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*((l1^2-dist_mat_x1^2)*dist_mat^2/(l1^3)+2*l1)

  del_cov1[(n1+1):(2*n1),(n2+1):(2*n2)]=-4*sigma1^2/((l1)^5)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x2^2)+
    sigma1^2/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*((l1^2-dist_mat_x2^2)*dist_mat^2/(l1^3)+2*l1)

  del_cov1[1:n1,(n2+1):(2*n2)]=del_cov1[(n1+1):(2*n1),1:n2]=
    -diff_mat*exp(-.5*dist_mat^2/(l1^2))*(dist_mat^2/(l1^3)*sigma1^2/((l1)^4)-4*sigma1^2/((l1)^5))

  del_cov2[1:n1,1:n2]=-4*sigma2^2/((l2)^5)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x2^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*((l2^2-dist_mat_x2^2)*dist_mat^2/(l2^3)+2*l2)

  del_cov2[(n1+1):(2*n1),(n2+1):(2*n2)]=-4*sigma2^2/((l2)^5)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x1^2)+
    sigma2^2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*((l2^2-dist_mat_x1^2)*dist_mat^2/(l2^3)+2*l2)

  del_cov2[1:n1,(n2+1):(2*n2)]=del_cov2[(n1+1):(2*n1),1:n2]=
    diff_mat*exp(-.5*dist_mat^2/(l2^2))*(dist_mat^2/(l2^3)*sigma2^2/((l2)^4)-4*sigma2^2/((l2)^5))


  del_cov3[1:n1,1:n2]=2*sigma1/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*(l1^2-dist_mat_x1^2)
  del_cov3[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma1/((l1)^4)*exp(-.5*dist_mat^2/(l1^2))*
    (l1^2-dist_mat_x2^2)
  del_cov3[1:n1,(n2+1):(2*n2)]=del_cov3[(n1+1):(2*n1),1:n2]=
    diff_mat*(-exp(-.5*dist_mat^2/(l1^2))*2*sigma1/((l1)^4))

  del_cov4[1:n1,1:n2]=2*sigma2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2))*(l2^2-dist_mat_x2^2)
  del_cov4[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma2/((l2)^4)*exp(-.5*dist_mat^2/
                                                                (l2^2))*(l2^2-dist_mat_x1^2)
  del_cov4[1:n1,(n2+1):(2*n2)]=del_cov4[(n1+1):(2*n1),1:n2]=
    diff_mat*(2*sigma2/((l2)^4)*exp(-.5*dist_mat^2/(l2^2)) )


  del_sigma1=b*del_sigma1+alpha^2*del_cov3
  del_sigma2=b*del_sigma2+alpha^2*del_cov4
  del_11=b*del_l1+alpha^2*del_cov1
  del_12=b*del_l2+alpha^2*del_cov2


  inv_K=inv(K)
  del_alpha= 2*(alpha-1)*K1+2*alpha*K2

  grad=.5*c(-t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_sigma1)%*%inv_K%*%c(ytr[,1],ytr[,2])+
              Trace(inv_K%*%(del_sigma1)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_sigma2)%*%inv_K%*%c(ytr[,1],ytr[,2])+
              Trace(inv_K%*%(del_sigma2)),
            -t(c(ytr[,1],ytr[,2]))%*%(2*tau*inv_K)%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(2*tau*inv_K),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_l1)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%(del_l1)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_l2)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%(del_l2)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(del_alpha)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+Trace(inv_K%*%(del_alpha))
  )
  #print(grad)
  return(grad)


}

grad_mix2=function(xtr,ytr,sigma1,sigma2,tau,l1,l2,alpha){

  x1=x2=xtr
  K1=fund_cov_mat(x1, x2, sigma1,sigma2,l1,l2)

  K2=cov_mat(x1, x2,sigma1,sigma2,l1,l2)

  sigma_obs_2=tau

  K=(1-alpha)^2*K1+alpha^2*K2+
    diag(sigma_obs_2^2,nrow=2*nrow(xtr))

  n1=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  n2=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))

  cov = matrix(0, ncol =2 * ifelse(length(as.matrix(x2)) == 2, 1,nrow(x2)),
               nrow = 2 * ifelse(length(as.matrix(x1)) == 2, 1, nrow(x1)))

  del_sigma1=del_sigma2=del_sigma3=del_sigma4=del_l1=del_l2=cov

  for(i in 1:(length(as.matrix(x1))/2)){
    #if(i%%100==0){print(i)}
    theta1=ifelse(length(as.matrix(x1))==2,atan2(x1[2],x1[1]),
                  atan2(x1[i,2],x1[i,1]))
    for (j in 1:(length(as.matrix(x2))/2)){
      r1=ifelse(length(as.matrix(x1))==2,sum((x1)^2)^.5,
                sum((x1[i,])^2)^.5)
      r2=ifelse(length(as.matrix(x2))==2,sum((x2)^2)^.5,
                sum((x2[j,])^2)^.5)
      #dist=abs(x1[i,1]-x2[j,1])+abs(x1[i,2]-x2[j,2])
      theta2=ifelse(length(as.matrix(x2))==2,atan2(x2[2],x2[1]),
                    atan2(x2[j,2],x2[j,1]))

      results_sigma1=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_sig1(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      results_sigma2=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_sig2(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      results_l1=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_l_1(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      results_l2=cbind(c(cos(theta1),sin(theta1)),c(-sin(theta1),cos(theta1)))%*%
        del_l_2(c(r1,0),c(r2,0),sigma1,sigma2,l1,l2)%*%
        cbind(c(cos(theta2),-sin(theta2)),c(sin(theta2),cos(theta2)))

      del_sigma1[i, j] = results_sigma1[1,1]
      del_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma1[2,2]
      del_sigma1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_sigma1[2,1]
      del_sigma1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma1[1,2]

      del_sigma2[i, j] = results_sigma2[1,1]
      del_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
                 ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma2[2,2]
      del_sigma2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_sigma2[2,1]
      del_sigma2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_sigma2[1,2]
      
      del_l1[i, j] = results_l1[1,1]
      del_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l1[2,2]
      del_l1[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_l1[2,1]
      del_l1[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l1[1,2]

      del_l2[i, j] = results_l2[1,1]
      del_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i,
             ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l2[2,2]
      del_l2[ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + i, j] = results_l2[2,1]
      del_l2[i,ifelse(length(as.matrix(xtr))==2,1,nrow(xtr)) + j] = results_l2[1,2]

    }
  }


  n1=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))
  n2=ifelse(length(as.matrix(xtr))==2,1,nrow(xtr))

  if(n1==1){
    dist_mat=distmat(t(xtr),xtr)
  }else{
    dist_mat=distmat(xtr,xtr)
  }
  b=(1-alpha)^2

  del_sigma1_2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_sigma1_2[1:n1,1:n2]=2*sigma1*exp(-.5*dist_mat^2*(l1^2))


  del_sigma2_2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_sigma2_2[(n1+1):(2*n1),(n2+1):(2*n2)]=2*sigma2*exp(-.5*dist_mat^2*(l2^2))



  del_tau=2*tau*diag(nrow=2*nrow(xtr))



  del_l1_2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_l1_2[1:n1,1:n2]=sigma1^2*exp(-.5*dist_mat^2*(l1^2))*(-l1*dist_mat^2)

  del_l2_2=matrix(0,nrow=2*n1,ncol=2*n2)
  del_l2_2[(n1+1):(2*n1),(n2+1):(2*n2)]=sigma2^2*exp(-.5*dist_mat^2*(l2^2))*(-l2*dist_mat^2)

  del_alpha=2*(alpha-1)*K1+2*alpha*K2

  inv_K=inv(K)
  alpha=alpha^2
  grad=.5*c(-t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(b*del_sigma1+alpha*del_sigma1_2)%*%inv_K%*%c(ytr[,1],ytr[,2])+
              Trace(inv_K%*%((b*del_sigma1+alpha*del_sigma1_2))),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%((b*del_sigma2+alpha*del_sigma2_2))%*%inv_K%*%c(ytr[,1],ytr[,2])+
              Trace(inv_K%*%(b*del_sigma2+alpha*del_sigma2_2)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_tau%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_tau),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(b*del_l1+alpha*del_l1_2)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+
              Trace(inv_K%*%(b*del_l1+alpha*del_l1_2)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%(b*del_l2+alpha*del_l2_2)%*%inv_K%*%(c(ytr[,1],ytr[,2]))+
              Trace(inv_K%*%(b*del_l2+alpha*del_l2_2)),
            -t(c(ytr[,1],ytr[,2]))%*%inv_K%*%del_alpha%*%inv_K%*%c(ytr[,1],ytr[,2])+Trace(inv_K%*%del_alpha))
  #print(grad)
  return(grad)

}

Gradient check

Ytr=matrix(runif(20),ncol=2);Xtr=matrix(runif(20),ncol=2)

nloptr(runif(6),function(x) {
  log_likelihood_mixture2(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5],x[6])
},function(x) {
  grad_mix2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5],x[6])
},opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] = -2.530707e+00 ~ -2.530707e+00   [9.069471e-08]
##   eval_grad_f[ 2 ] =  9.000867e-01 ~  9.000866e-01   [1.148235e-07]
##   eval_grad_f[ 3 ] =  1.716147e+01 ~  1.716147e+01   [1.127472e-09]
##   eval_grad_f[ 4 ] =  6.580643e-02 ~  6.580639e-02   [5.757589e-07]
##   eval_grad_f[ 5 ] =  6.063697e-01 ~  6.063695e-01   [2.907534e-07]
##   eval_grad_f[ 6 ] =  7.327573e-01 ~  7.327573e-01   [7.713879e-08]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_mixture2(Ytr, Xtr, x[1], x[2], x[3], x[4], 
##         x[5], x[6])}, eval_grad_f = function(x) {
##     grad_mix2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5], x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 62 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  6.9572890860297 
## Optimal value of controls: 0.297003 0.219074 -0.2831727 -6.72925e-09 -2.759944e-09 1.683242
nloptr(runif(6),function(x) {
  log_likelihood_helm_mix2(
    Ytr, Xtr, x[1], x[2], x[3], x[4], x[5],x[6])
},function(x) {
  grad_mix_helm2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5],x[6])
},opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 2 error(s) detected.
## 
##   eval_grad_f[ 1 ] =  2.210494e+01 ~  2.210494e+01   [5.043538e-08]
##   eval_grad_f[ 2 ] =  2.714682e+00 ~  2.714681e+00   [2.633572e-07]
##   eval_grad_f[ 3 ] =  2.630667e-01 ~  2.630663e-01   [1.432442e-06]
## * eval_grad_f[ 4 ] = -2.763274e+02 ~ -2.075088e+02   [3.316420e-01]
## * eval_grad_f[ 5 ] = -1.389160e+01 ~ -9.564132e+00   [4.524687e-01]
##   eval_grad_f[ 6 ] =  1.647330e+01 ~  1.647330e+01   [2.432100e-08]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_helm_mix2(Ytr, Xtr, x[1], x[2], x[3], x[4], 
##         x[5], x[6])}, eval_grad_f = function(x) {
##     grad_mix_helm2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5], x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 65 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  7.04462600854221 
## Optimal value of controls: -9.298528 3.786178 0.2850047 254.6892 13.39548 1.545982

Example: Recovery of \(\alpha\)

ALPHA=seq(0,1,l=6)
opt_alpha=opt_alpha_helm=ALPHA
initial_alpha=.5
set.seed(1)
train_ind=sample(564,100) 
test_ind=-train_ind

if(QUICK_RUN){train_ind=sample(564,20);max_iter=20}


for(i in 1:length(ALPHA)){
  Xtr=X[train_ind,]
  Ytr=Y[train_ind,];F_tr=Eq_noise[train_ind,]

  Xte=X[test_ind,]
  Yte=Y[test_ind,];F_te=Eq_noise[test_ind,]

  Ytr=ALPHA[i]*Ytr+(1-ALPHA[i])*F_tr
  Yte=ALPHA[i]*Yte+(1-ALPHA[i])*F_te

  opt_par=Adam(function(x) {
    log_likelihood_mixture2(
      Ytr, Xtr, x[1], x[2], x[3], x[4], x[5], x[6])
  },function(x) {
    grad_mix2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5], x[6])
  },
  c(initial_sigma1,
    initial_sigma2,
    initial_sigma_obs,
    initial_l1,
    initial_l2,
    initial_alpha),
  lr=lr,max_iter=max_iter,
  lb=c(rep(0.1,5),0.01),ub=c(rep(Inf,5),0.99))

  opt_alpha[i]=opt_par[6]



  opt_par=Adam(function(x) {
    log_likelihood_helm_mix2(
      Ytr, Xtr, x[1], x[2], x[3], x[4], x[5], x[6])
  },function(x) {
    grad_mix_helm2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5], x[6])
  },
  c(initial_sigma1,
    initial_sigma2,
    initial_sigma_obs,
    initial_l1,
    initial_l2,
    initial_alpha),
  lr=lr,max_iter=max_iter,
  lb=c(rep(0.1,5),0.01),ub=c(rep(Inf,5),0.99))



  opt_alpha_helm[i]=opt_par[6]

}

col=brewer.pal(4,"Dark2")
plot(
  ALPHA, opt_alpha, type = "b", pch = 19, col = col[1],
  ylim = c(0,1),
  xlab = expression(alpha), ylab = "predicted",
  xaxt = "n", yaxt = "n", bty = "n", cex.axis = 1.2, cex.lab = 1.2, cex.main = 1.5
)

for (k in ALPHA) {
        segments(x0 = k, y0 = -2, x1 = k, y1 = 12, col = "gray", lty = "dotted")
    }
    
    # Horizontal gridlines limited to y-axis range
    for (y in c(opt_alpha, opt_alpha_helm)) {
        segments(x0 = 0, y0 = y, x1 = max(ALPHA), y1 = y, col = "gray", lty = "dotted")
    }
# Add the second group
lines(ALPHA,opt_alpha_helm, type = "b", pch = 19, col = col[2])
abline(a = 0, b = 1, lty = 2)
# Add axis labels
axis(1, at = ALPHA, labels = ALPHA, cex.axis = 1.2)
axis(2, las = 1, cex.axis = 1.2)

legend("bottomright",c("SE mixture","Helmholtz mixture"),col=col,lty=1)

Experiment 3: Dipole moment estimation

Data loading

X_names=c("X.X_O","X.Y_O","X.Z_O","X.X_H1","X.Y_H1","X.Z_H1",
          "X.X_H2","X.Y_H2","X.Z_H2")
Y_names=c("Y.X_DIPOLE_MP2","Y.Y_DIPOLE_MP2","Y.Z_DIPOLE_MP2")

data = readLines("dipole moment data/original_struct.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

original_struct=data.frame(Y=Y,X=X)
# Print or use the matrices as needed

data = readLines("dipole moment data/original_struct.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

original_struct=data.frame(Y=Y,X=X)

data = readLines("dipole moment data/rot_0.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_0=data.frame(Y=Y,X=X)



data = readLines("dipole moment data/rot_1.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_1=data.frame(Y=Y,X=X)


data = readLines("dipole moment data/rot_2.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_2=data.frame(Y=Y,X=X)



data = readLines("dipole moment data/rot_3.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_3=data.frame(Y=Y,X=X)

data = readLines("dipole moment data/rot_4.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_4=data.frame(Y=Y,X=X)


data = readLines("dipole moment data/rot_5.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)


rot_5=data.frame(Y=Y,X=X)



data = readLines("dipole moment data/rot_6.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_6=data.frame(Y=Y,X=X)



data = readLines("dipole moment data/rot_7.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_7=data.frame(Y=Y,X=X)


data = readLines("dipole moment data/rot_8.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)

rot_8=data.frame(Y=Y,X=X)


data = readLines("dipole moment data/rot_9.xyz")

X_DIPOLE_MP2 = c()
Y_DIPOLE_MP2 = c()
Z_DIPOLE_MP2 = c()
X_O = c()
Y_O = c()
Z_O = c()
X_H1 = c()
Y_H1 = c()
Z_H1 = c()
X_H2 = c()
Y_H2 = c()
Z_H2 = c()

for (i in 1:length(data)) {
  
  if (grepl("X_DIPOLE_MP2", data[i])) {
    X_DIPOLE_MP2 = append(X_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i], "=| "))[4]))
    Y_DIPOLE_MP2 = append(Y_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+1], "=| "))[4]))
    Z_DIPOLE_MP2 = append(Z_DIPOLE_MP2, as.numeric(unlist(strsplit(data[i+2], "=| "))[4]))
    
    loc1=as.numeric(unlist(strsplit(data[i+12], " ")))
    loc1=loc1[!is.na(loc1)]
    X_O = append(X_O, loc1[1])
    Y_O = append(Y_O, loc1[2])
    Z_O = append(Z_O, loc1[3])
    
    loc2= as.numeric(unlist(strsplit(data[i+13], " ")))
    loc2=loc2[!is.na(loc2)]
    X_H1 = append(X_H1,loc2[1])
    Y_H1 = append(Y_H1, loc2[2])
    Z_H1 = append(Z_H1, loc2[3])
    
    loc3=as.numeric(unlist(strsplit(data[i+14], " ")))
    loc3=loc3[!is.na(loc3)]
    
    X_H2 = append(X_H2, loc3[1])
    Y_H2 = append(Y_H2, loc3[2])
    Z_H2 = append(Z_H2, loc3[3])
  }
}

# Combine the vectors into matrices
Y = cbind(X_DIPOLE_MP2, Y_DIPOLE_MP2, Z_DIPOLE_MP2)
X = cbind(X_O, Y_O, Z_O, X_H1, Y_H1, Z_H1, X_H2, Y_H2, Z_H2)


rot_9=data.frame(Y=Y,X=X)

Removal of duplicates

projection_fund=function(x1){
  norm=function(x){sum(x^2)^.5}
  if(norm(x1[4:6]-x1[1:3])>=norm(x1[7:9]-x1[1:3])){
    x_H_bar_1=x1[4:6]-x1[1:3]
    x_H_bar_2=x1[7:9]-x1[1:3]
    r1=norm(x_H_bar_1)
    r2=norm(x_H_bar_2)
  }else{
    x_H_bar_1=x1[7:9]-x1[1:3]
    x_H_bar_2=x1[4:6]-x1[1:3]
    r1=norm(x_H_bar_1)
    r2=norm(x_H_bar_2)
  }
  
  theta1=atan2(x_H_bar_1[1],x_H_bar_1[2])
  x_H_tilde_1=c(0,sin(theta1)*x_H_bar_1[1]+cos(theta1)*x_H_bar_1[2],x_H_bar_1[3])
  phi1=-atan2(x_H_tilde_1[3],x_H_tilde_1[2])
  
  psi_1=matrix(c(cos(theta1),-sin(theta1),0,sin(theta1),cos(theta1),0,0,0,1),
               nrow=3,byrow = 1)
  psi_2=matrix(c(1,0,0,0,cos(phi1),-sin(phi1),0,sin(phi1),cos(phi1)),
               nrow=3,byrow = 1)
  
  x_H_tilde_2=psi_2%*%psi_1%*%x_H_bar_2
  
  phi2=-atan2(x_H_tilde_2[3],x_H_tilde_2[1])
  psi_3=matrix(c(cos(phi2),0,-sin(phi2),0,1,0,sin(phi2),0,cos(phi2)),
               nrow=3,byrow = 1)
  
  A1=c(r1,(psi_3%*%x_H_tilde_2)[1],(psi_3%*%x_H_tilde_2)[2])
  return(A1)
}
X=as.matrix(original_struct[X_names])
Y=as.matrix(original_struct[Y_names])

A=t(apply(X,1,projection_fund))
#A=matrix(rnorm(3*3641,mean(A),sd(A)),ncol=3)


a=diag(1,nrow(A),nrow(A))
for(i in 1:nrow(A)){
  for(j in 1:nrow(A)){
    if(i!=j){a[i,j]=sum((A[i,]-A[j,])^2)^.5}
  }
}

### DUPLICATES CHECK
threshold=1e-10
duplicates=c()
for(i in 1:1261){
  ind=which(a[i,]<threshold)
  duplicates=c(duplicates,ind[ind>i])
}

Data visualisation

rgl::setupKnitr()

# Generate 3D plot

dat=list(original_struct,rot_0,rot_1,rot_2,rot_3,
         rot_4,rot_5,rot_6,rot_7,rot_8,rot_9)

rot_ind=sample(10,1261,replace = TRUE)
swap_ind=sample(2,1261,replace = TRUE)


X=(t(sapply(1:1261,function(i){
  if(swap_ind[i]==1){
  as.matrix(dat[[rot_ind[i]]][X_names])[i,]
}else{
as.matrix(dat[[rot_ind[i]]][X_names])[i,c(1:3,7:9,4:6)]
}
}))+t(sapply(1:1261,function(i){
  a=runif(3,-2,2)
  rep(a,3)
})))[-duplicates,]


Y=t(sapply(1:1261,function(i){
  as.matrix(dat[[rot_ind[i]]][Y_names])[i,]
}))[-duplicates,]


X_O=X[,1:3];X_H1=X[,4:6];X_H2=X[,7:9]

osize=2.5;size=2.5
open3d(silent = TRUE)
par3d(windowRect = c(100, 100, 612, 612))

plot3d(X_O,xlim=range(X_O[,1]),
       ylim=range(X_O[,2]),
       zlim=range(X_O[,3]),type='p',cex=.1,main="",col=2,
       
       xlab = "", ylab = "", zlab = "",size = osize)


plot3d(X_H1,xlim=range(X_H1[,1]),
       ylim=range(X_H1[,2]),
       zlim=range(X_H1[,3]),type='p',cex=.1,main="Data",
       col="lightblue",add = 1,size = size)

plot3d(X_H2,xlim=range(X_H2[,1]),
       ylim=range(X_H2[,2]),
       zlim=range(X_H2[,3]),type='p',cex=.1,main="Data",
       col="blue",add = 1,size = size)



plot3d(Y,xlim=range(Y[,1]),
       ylim=range(Y[,2]),
       zlim=range(Y[,3]),type='p',cex=.1,main="Data",
       col="orange",add = 1,size = size)
legend3d("top",
         legend = c("Oxygen", "Hydrogen 1", "Hydrogen 2", "Dipole moment"), 
         pch = 19, 
         col = c(2, "lightblue", "blue", "orange"), 
         cex = 1)
rgl.viewpoint(theta = 90, phi = 30)
rglwidget()

Covariance matrices

fund_cov_mat=function(x1,x2,l1,sigma1,l2,sigma2,l3,sigma3){
  norm=function(x){sum(x^2)^.5}
  if(norm(x1[4:6]-x1[1:3])>=norm(x1[7:9]-x1[1:3])){
    x_H_bar_1=x1[4:6]-x1[1:3]
    x_H_bar_2=x1[7:9]-x1[1:3]
    r1=norm(x_H_bar_1)
    r2=norm(x_H_bar_2)
  }else{
    x_H_bar_1=x1[7:9]-x1[1:3]
    x_H_bar_2=x1[4:6]-x1[1:3]
    r1=norm(x_H_bar_1)
    r2=norm(x_H_bar_2)
  }
  
  theta1=atan2(x_H_bar_1[1],x_H_bar_1[2])
  x_H_tilde_1=c(0,sin(theta1)*x_H_bar_1[1]+cos(theta1)*x_H_bar_1[2],x_H_bar_1[3])
  phi1=-atan2(x_H_tilde_1[3],x_H_tilde_1[2])
  
  psi_1=matrix(c(cos(theta1),-sin(theta1),0,sin(theta1),cos(theta1),0,0,0,1),
               nrow=3,byrow = 1)
  psi_2=matrix(c(1,0,0,0,cos(phi1),-sin(phi1),0,sin(phi1),cos(phi1)),
               nrow=3,byrow = 1)
  
  x_H_tilde_2=psi_2%*%psi_1%*%x_H_bar_2
  
  phi2=-atan2(x_H_tilde_2[3],x_H_tilde_2[1])
  psi_3=matrix(c(cos(phi2),0,-sin(phi2),0,1,0,sin(phi2),0,cos(phi2)),
               nrow=3,byrow = 1)
  
  A1=c(r1,(psi_3%*%x_H_tilde_2)[1],(psi_3%*%x_H_tilde_2)[2])
  
  
  if(norm(x2[4:6]-x2[1:3])>=norm(x2[7:9]-x2[1:3])){
    x_H2_bar_1=x2[4:6]-x2[1:3]
    x_H2_bar_2=x2[7:9]-x2[1:3]
    r1_2=norm(x_H2_bar_1)
    r2_2=norm(x_H2_bar_2)
  }else{
    x_H2_bar_1=x2[7:9]-x2[1:3]
    x_H2_bar_2=x2[4:6]-x2[1:3]
    r1_2=norm(x_H2_bar_1)
    r2_2=norm(x_H2_bar_2)
  }
  
  theta1_2=atan2(x_H2_bar_1[1],x_H2_bar_1[2])
  x_H2_tilde_1=c(0,sin(theta1_2)*x_H2_bar_1[1]+cos(theta1_2)*x_H2_bar_1[2],x_H2_bar_1[3])
  phi1_2=-atan2(x_H2_tilde_1[3],x_H2_tilde_1[2])
  
  psi_1_2=matrix(c(cos(theta1_2),-sin(theta1_2),0,sin(theta1_2),cos(theta1_2),0,0,0,1),
                 nrow=3,byrow = 1)
  psi_2_2=matrix(c(1,0,0,0,cos(phi1_2),-sin(phi1_2),0,sin(phi1_2),cos(phi1_2)),
                 nrow=3,byrow = 1)
  
  x_H2_tilde_2=psi_2_2%*%psi_1_2%*%x_H2_bar_2
  
  phi2_2=-atan2(x_H2_tilde_2[3],x_H2_tilde_2[1])
  psi_3_2=matrix(c(cos(phi2_2),0,-sin(phi2_2),0,1,0,sin(phi2_2),0,cos(phi2_2)),
                 nrow=3,byrow = 1)
  
  A2=c(r1_2,(psi_3_2%*%x_H2_tilde_2)[1],(psi_3_2%*%x_H2_tilde_2)[2])
  
  t(psi_3%*%psi_2%*%psi_1)%*%(exp(diag(c(-(norm(A1-A2))^2/(2*l1^2),
                                         -(norm(A1-A2))^2/(2*l2^2),
                                         -(norm(A1-A2))^2/(2*l3^2))))*
                                diag(c(sigma1^2,sigma2^2,sigma3^2)))%*%psi_3_2%*%psi_2_2%*%psi_1_2
  
  
}

cross_fund_cov_mat = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  norm = function(x) {
    sqrt(sum(x^2))
  }
  
  n_x1 = ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1))
  n_x2 = ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2))
  cov = matrix(0, nrow = 3 * n_x1, ncol = 3 * n_x2)
  
  for (i in 1:n_x1) {
    for (j in 1:n_x2) {
      x1_i = if (n_x1 == 1) x1 else x1[i, ]
      x2_j = if (n_x2 == 1) x2 else x2[j, ]
      
      cov1 = fund_cov_mat(x1_i, x2_j, l1, sigma1, l2, sigma2, l3, sigma3)
      
      cov[i, j] = cov1[1, 1]
      cov[i + n_x1, j + n_x2] = cov1[2, 2]
      cov[i + 2 * n_x1, j + 2 * n_x2] = cov1[3, 3]
      
      cov[i + n_x1, j] = cov1[2, 1]
      cov[i + 2 * n_x1, j] = cov1[3, 1]
      cov[i, j + n_x2] = cov1[1, 2]
      cov[i, j + 2 * n_x2] = cov1[1, 3]
      cov[i + 2 * n_x1, j + n_x2] = cov1[3, 2]
      cov[i + n_x1, j + 2 * n_x2] = cov1[2, 3]
    }
  }
  
  return(cov)
}

cov_mat3d = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  norm = function(x) {
    sqrt(sum(x^2))
  }
  
  diag(c(
    sigma1^2 * exp(-norm(x1 - x2)^2 / (2 * l1^2)),
    sigma2^2 * exp(-norm(x1 - x2)^2 / (2 * l2^2)),
    sigma3^2 * exp(-norm(x1 - x2)^2 / (2 * l3^2))
  ))
}

cross_cov_mat3d = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  n_x1 = ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1))
  n_x2 = ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2))
  cov = matrix(0, nrow = 3 * n_x1, ncol = 3 * n_x2)
  
  for (i in 1:n_x1) {
    for (j in 1:n_x2) {
      x1_i = x1[i, ]
      x2_j = x2[j, ]
      
      cov1 = cov_mat3d(x1_i, x2_j, l1, sigma1, l2, sigma2, l3, sigma3)
      
      cov[i, j] = cov1[1, 1]
      cov[i + n_x1, j + n_x2] = cov1[2, 2]
      cov[i + 2 * n_x1, j + 2 * n_x2] = cov1[3, 3]
      
      cov[i + n_x1, j] = cov1[2, 1]
      cov[i + 2 * n_x1, j] = cov1[3, 1]
      cov[i, j + n_x2] = cov1[1, 2]
      cov[i, j + 2 * n_x2] = cov1[1, 3]
      cov[i + 2 * n_x1, j + n_x2] = cov1[3, 2]
      cov[i + n_x1, j + 2 * n_x2] = cov1[2, 3]
    }
  }
  
  return(cov)
}

cov_mat3d_2 = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  norm = function(x) {
    sqrt(sum(x^2))
  }
  
  x1 = x1 - rep(x1[1:3], 3)
  x2 = x2 - rep(x2[1:3], 3)
  
  diag(c(
    sigma1^2 * exp(-norm(x1 - x2)^2 / (2 * l1^2)),
    sigma2^2 * exp(-norm(x1 - x2)^2 / (2 * l2^2)),
    sigma3^2 * exp(-norm(x1 - x2)^2 / (2 * l3^2))
  ))
}

cross_cov_mat3d_2 = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  n_x1 = ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1))
  n_x2 = ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2))
  cov = matrix(0, nrow = 3 * n_x1, ncol = 3 * n_x2)
  
  for (i in 1:n_x1) {
    for (j in 1:n_x2) {
      x1_i = x1[i, ]
      x2_j = x2[j, ]
      
      cov1 = cov_mat3d_2(x1_i, x2_j, l1, sigma1, l2, sigma2, l3, sigma3)
      
      cov[i, j] = cov1[1, 1]
      cov[i + n_x1, j + n_x2] = cov1[2, 2]
      cov[i + 2 * n_x1, j + 2 * n_x2] = cov1[3, 3]
      
      cov[i + n_x1, j] = cov1[2, 1]
      cov[i + 2 * n_x1, j] = cov1[3, 1]
      cov[i, j + n_x2] = cov1[1, 2]
      cov[i, j + 2 * n_x2] = cov1[1, 3]
      cov[i + 2 * n_x1, j + n_x2] = cov1[3, 2]
      cov[i + n_x1, j + 2 * n_x2] = cov1[2, 3]
    }
  }
  
  return(cov)
}

cov_mat3d_3 = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  norm = function(x) {
    sqrt(sum(x^2))
  }
  
  if(norm( (x1 - rep(x1[1:3], 3))[4:6])< norm( (x1 - rep(x1[1:3], 3))[7:9])){
    x1=c(x1[1:3],x1[7:9],x1[4:6])
  }
 if(norm( (x2 - rep(x2[1:3], 3))[4:6])< norm( (x2 - rep(x2[1:3], 3))[7:9])){
    x2=c(x2[1:3],x2[7:9],x2[4:6])
  }
  
  diag(c(
    sigma1^2 * exp(-norm(x1 - x2)^2 / (2 * l1^2)),
    sigma2^2 * exp(-norm(x1 - x2)^2 / (2 * l2^2)),
    sigma3^2 * exp(-norm(x1 - x2)^2 / (2 * l3^2))
  ))
}

cross_cov_mat3d_3 = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  n_x1 = ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1))
  n_x2 = ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2))
  cov = matrix(0, nrow = 3 * n_x1, ncol = 3 * n_x2)
  
  for (i in 1:n_x1) {
    for (j in 1:n_x2) {
      x1_i = x1[i, ]
      x2_j = x2[j, ]
      
      cov1 = cov_mat3d_3(x1_i, x2_j, l1, sigma1, l2, sigma2, l3, sigma3)
      
      cov[i, j] = cov1[1, 1]
      cov[i + n_x1, j + n_x2] = cov1[2, 2]
      cov[i + 2 * n_x1, j + 2 * n_x2] = cov1[3, 3]
      
      cov[i + n_x1, j] = cov1[2, 1]
      cov[i + 2 * n_x1, j] = cov1[3, 1]
      cov[i, j + n_x2] = cov1[1, 2]
      cov[i, j + 2 * n_x2] = cov1[1, 3]
      cov[i + 2 * n_x1, j + n_x2] = cov1[3, 2]
      cov[i + n_x1, j + 2 * n_x2] = cov1[2, 3]
    }
  }
  
  return(cov)
}

cov_mat3d_4 = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  norm = function(x) {
    sqrt(sum(x^2))
  }
  
  x1 = x1 - rep(x1[1:3], 3)
  x2 = x2 - rep(x2[1:3], 3)
  
  if (norm(x1[4:6]) < norm(x1[7:9])) {
    x1 = c(0, 0, 0, x1[7:9], x1[4:6])
  }
  
  if (norm(x2[4:6]) < norm(x2[7:9])) {
    x2 = c(0, 0, 0, x2[7:9], x2[4:6])
  }
  
  diag(c(
    sigma1^2 * exp(-norm(x1 - x2)^2 / (2 * l1^2)),
    sigma2^2 * exp(-norm(x1 - x2)^2 / (2 * l2^2)),
    sigma3^2 * exp(-norm(x1 - x2)^2 / (2 * l3^2))
  ))
}

cross_cov_mat3d_4 = function(x1, x2, l1, sigma1, l2, sigma2, l3, sigma3) {
  cov = matrix(0, 
                ncol = 3 * ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)),
                nrow = 3 * ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)))
  
  for (i in 1:(length(as.matrix(x1)) / 9)) {
    for (j in 1:(length(as.matrix(x2)) / 9)) {
      cov1 = cov_mat3d_4(x1[i, ], x2[j, ], l1, sigma1, l2, sigma2, l3, sigma3)
      
      cov[i, j] = cov1[1, 1]
      cov[ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)) + i,
          ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)) + j] = cov1[2, 2]
      
      cov[2 * ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)) + i,
          2 * ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)) + j] = cov1[3, 3]
      
      cov[ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)) + i, j] = cov1[2, 1]
      cov[2 * ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)) + i, j] = cov1[3, 1]
      
      cov[i, ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)) + j] = cov1[1, 2]
      cov[i, 2 * ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)) + j] = cov1[1, 3]
      
      cov[2 * ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)) + i,
          ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)) + j] = cov1[3, 2]
      
      cov[ifelse(length(as.matrix(x1)) == 9, 1, nrow(x1)) + i,
          2 * ifelse(length(as.matrix(x2)) == 9, 1, nrow(x2)) + j] = cov1[2, 3]
    }
  }
  
  return(cov)
}

Invariance and equivariance checks

x1=X[sample(851,1),];x2=X[sample(851,1),]

translation_vec1=runif(3);translation_vec2=runif(3)

l1=runif(1);sigma1=runif(1);l2=runif(1);sigma2=runif(1);l3=runif(1);sigma3=runif(1)

#two random rotation matrices in SO(3)
theta1=runif(1,0,1000);theta2=runif(1,0,1000);theta3=runif(1,0,1000)
psi_1=matrix(c(cos(theta1),-sin(theta1),0,sin(theta1),cos(theta1),0,0,0,1),
               nrow=3,byrow = 1)
psi_2=matrix(c(1,0,0,0,cos(theta2),-sin(theta2),0,sin(theta2),cos(theta2)),
               nrow=3,byrow = 1)
psi_3=matrix(c(cos(theta3),0,-sin(theta3),0,1,0,sin(theta3),0,cos(theta3)),
               nrow=3,byrow = 1)
rho_1=psi_1%*%psi_2%*%psi_3

theta1=runif(1,0,1000);theta2=runif(1,0,1000);theta3=runif(1,0,1000)
psi_1=matrix(c(cos(theta1),-sin(theta1),0,sin(theta1),cos(theta1),0,0,0,1),
               nrow=3,byrow = 1)
psi_2=matrix(c(1,0,0,0,cos(theta2),-sin(theta2),0,sin(theta2),cos(theta2)),
               nrow=3,byrow = 1)
psi_3=matrix(c(cos(theta3),0,-sin(theta3),0,1,0,sin(theta3),0,cos(theta3)),
               nrow=3,byrow = 1)
rho_2=psi_1%*%psi_2%*%psi_3

Equivariance check

fund_cov_mat(c(rho_1%*%x1[1:3],rho_1%*%x1[4:6],rho_1%*%x1[7:9]),
             c(rho_2%*%x2[1:3],rho_2%*%x2[4:6],rho_2%*%x2[7:9]),
              l1, sigma1, l2, sigma2, l3, sigma3)
##             [,1]       [,2]        [,3]
## [1,]  0.09252729  0.1165679 -0.07464482
## [2,]  0.03680874  0.0456876 -0.02948476
## [3,] -0.08669993 -0.1101810  0.07023649
rho_1%*%fund_cov_mat(x1,x2, l1, sigma1, l2, sigma2, l3, sigma3)%*%t(rho_2)
##             [,1]       [,2]        [,3]
## [1,]  0.09252729  0.1165679 -0.07464482
## [2,]  0.03680874  0.0456876 -0.02948476
## [3,] -0.08669993 -0.1101810  0.07023649
cov_mat3d(c(rho_1%*%x1[1:3],rho_1%*%x1[4:6],rho_1%*%x1[7:9]),
             c(rho_2%*%x2[1:3],rho_2%*%x2[4:6],rho_2%*%x2[7:9]),
              l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.629445e-16 0.000000e+00
## [3,]    0 0.000000e+00 1.087925e-22
rho_1%*%cov_mat3d(x1,x2, l1, sigma1, l2, sigma2, l3, sigma3)%*%t(rho_2)
##              [,1]         [,2]          [,3]
## [1,] 3.794475e-17 9.950365e-18 -2.497199e-17
## [2,] 2.628631e-17 6.893092e-18 -1.729938e-17
## [3,] 6.839821e-17 1.793615e-17 -4.501380e-17

Translation-invariance

cov_mat3d(x1+translation_vec1,x2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]        [,2]         [,3]
## [1,]    0 0.00000e+00 0.000000e+00
## [2,]    0 1.24658e-13 0.000000e+00
## [3,]    0 0.00000e+00 4.084417e-19
cov_mat3d(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.011258e-16 0.000000e+00
## [3,]    0 0.000000e+00 6.022644e-23
cov_mat3d_2(x1+translation_vec1,x2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.404891e-05 0.000000e+00
## [3,]    0 0.000000e+00 3.909064e-09
cov_mat3d_2(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.404891e-05 0.000000e+00
## [3,]    0 0.000000e+00 3.909064e-09
cov_mat3d_4(x1+translation_vec1,x2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]       [,2]         [,3]
## [1,] 1.36624e-190 0.00000000 0.000000e+00
## [2,]  0.00000e+00 0.02348352 0.000000e+00
## [3,]  0.00000e+00 0.00000000 3.866915e-05
cov_mat3d_4(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]       [,2]         [,3]
## [1,] 1.36624e-190 0.00000000 0.000000e+00
## [2,]  0.00000e+00 0.02348352 0.000000e+00
## [3,]  0.00000e+00 0.00000000 3.866915e-05
fund_cov_mat(x1+translation_vec1,x2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]        [,2]        [,3]
## [1,]  0.022853296  0.10391926 -0.06172984
## [2,] -0.006096888 -0.02710041  0.01688493
## [3,] -0.037410958 -0.16954546  0.10143329
fund_cov_mat(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]        [,2]        [,3]
## [1,]  0.022853296  0.10391926 -0.06172984
## [2,] -0.006096888 -0.02710041  0.01688493
## [3,] -0.037410958 -0.16954546  0.10143329

Translation+permutation-invariance

X1=x1-rep(x1[1:3],3);X2=x2-rep(x2[1:3],3)
X1=c(0,0,0,X1[7:9],X1[4:6]);X2=c(0,0,0,X2[7:9],X2[4:6])

cov_mat3d(X1+translation_vec1,x2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 4.836394e-05 0.000000e+00
## [3,]    0 0.000000e+00 1.809561e-08
cov_mat3d(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.011258e-16 0.000000e+00
## [3,]    0 0.000000e+00 6.022644e-23
cov_mat3d_2(X1+translation_vec1,X2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.404891e-05 0.000000e+00
## [3,]    0 0.000000e+00 3.909064e-09
cov_mat3d_2(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##      [,1]         [,2]         [,3]
## [1,]    0 0.000000e+00 0.000000e+00
## [2,]    0 1.404891e-05 0.000000e+00
## [3,]    0 0.000000e+00 3.909064e-09
cov_mat3d_4(X1+translation_vec1,X2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]       [,2]         [,3]
## [1,] 1.36624e-190 0.00000000 0.000000e+00
## [2,]  0.00000e+00 0.02348352 0.000000e+00
## [3,]  0.00000e+00 0.00000000 3.866915e-05
cov_mat3d_4(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]       [,2]         [,3]
## [1,] 1.36624e-190 0.00000000 0.000000e+00
## [2,]  0.00000e+00 0.02348352 0.000000e+00
## [3,]  0.00000e+00 0.00000000 3.866915e-05
fund_cov_mat(X1+translation_vec1,X2+translation_vec2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]        [,2]        [,3]
## [1,]  0.022853296  0.10391926 -0.06172984
## [2,] -0.006096888 -0.02710041  0.01688493
## [3,] -0.037410958 -0.16954546  0.10143329
fund_cov_mat(x1,x2,l1, sigma1, l2, sigma2, l3, sigma3)
##              [,1]        [,2]        [,3]
## [1,]  0.022853296  0.10391926 -0.06172984
## [2,] -0.006096888 -0.02710041  0.01688493
## [3,] -0.037410958 -0.16954546  0.10143329

Likelihoods

log_likelihood_fund = function(ytr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) {
  if (length(ytr) > 3 && ncol(ytr) == 3) {
    ytr = c(ytr[, 1], ytr[, 2], ytr[, 3])
  }
  
  ntr = ifelse(length(xtr) == 9, 1, nrow(xtr))
  Ktr = cross_fund_cov_mat(xtr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) +
    diag(jit, nrow = 3 * ntr)
  
  ll = -0.5 * t(ytr) %*% inv(Ktr) %*% ytr - 0.5 * log(det(Ktr)) - ntr * log(2 * pi)
  ll = ifelse(is.nan(ll), 10^6, -ll)
  
  return(ll)
}

log_likelihood_standard = function(ytr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) {
  if (length(ytr) > 3 && ncol(ytr) == 3) {
    ytr = c(ytr[, 1], ytr[, 2], ytr[, 3])
  }
  
  ntr = ifelse(length(xtr) == 9, 1, nrow(xtr))
  Ktr = cross_cov_mat3d(xtr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) +
    diag(jit, nrow = 3 * ntr)
  
  ll = -0.5 * sum(ytr * inv(Ktr)%*%ytr) - 0.5 * determinant(Ktr)$modulus[1] - ntr * log(2 * pi)
  ll = ifelse(is.nan(ll), 10^6, -ll)
  
  return(ll)
}

log_likelihood_standard2 = function(ytr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) {
  if (length(ytr) > 3 && ncol(ytr) == 3) {
    ytr = c(ytr[, 1], ytr[, 2], ytr[, 3])
  }
  
  ntr = ifelse(length(xtr) == 9, 1, nrow(xtr))
  Ktr = cross_cov_mat3d_2(xtr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) +
    diag(jit, nrow = 3 * ntr)
  
  ll = -0.5 * sum(ytr * inv(Ktr)%*%ytr) - 0.5 * determinant(Ktr)$modulus[1] - ntr * log(2 * pi)
  ll = ifelse(is.nan(ll), 10^6, -ll)
  
  return(ll)
}

log_likelihood_standard3 = function(ytr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) {
  if (length(ytr) > 3 && ncol(ytr) == 3) {
    ytr = c(ytr[, 1], ytr[, 2], ytr[, 3])
  }
  
  ntr = ifelse(length(xtr) == 9, 1, nrow(xtr))
  Ktr = cross_cov_mat3d_3(xtr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) +
    diag(jit, nrow = 3 * ntr)
  
  ll = -0.5 * sum(ytr * inv(Ktr)%*%ytr) - 0.5 * determinant(Ktr)$modulus[1] - ntr * log(2 * pi)
  ll = ifelse(is.nan(ll), 10^6, -ll)
  
  return(ll)
}
log_likelihood_standard4 = function(ytr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) {
  if (length(ytr) > 3 && ncol(ytr) == 3) {
    ytr = c(ytr[, 1], ytr[, 2], ytr[, 3])
  }
  
  ntr = ifelse(length(xtr) == 9, 1, nrow(xtr))
  Ktr = cross_cov_mat3d_4(xtr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) +
    diag(jit, nrow = 3 * ntr)
  
  ll = -0.5 * sum(ytr * inv(Ktr)%*%ytr) - 0.5 * determinant(Ktr)$modulus[1] - ntr * log(2 * pi)
  ll = ifelse(is.nan(ll), 10^6, -ll)
  
  return(ll)
}

Likelihood gradients

grad_fund = function(xtr, ytr, l1, sigma1, l2, sigma2, l3, sigma3) {
  
  Trace = function(x) {
    sum(diag(x))
  }
  
  norm = function(x) {
    sqrt(sum(x^2))
  }
  
  x1 = x2 = xtr
  K = cross_fund_cov_mat(xtr, xtr, l1, sigma1, l2, sigma2, l3, sigma3) +
    diag(jit, nrow = 3 * nrow(xtr))
  
  size = 3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr))
  K_l1 = K_l2 = K_l3 = K_s1 = K_s2 = K_s3 = 
    matrix(0, ncol = size, nrow = size)
  
  inv_K = inv(K)
  
  for (i in 1:(length(as.matrix(x1)) / 9)) {
    for (j in 1:(length(as.matrix(x2)) / 9)) {
      
      x1 = xtr[i, ]
      x2 = xtr[j, ]
      
      if (norm(x1[4:6] - x1[1:3]) >= norm(x1[7:9] - x1[1:3])) {
        x_H_bar_1 = x1[4:6] - x1[1:3]
        x_H_bar_2 = x1[7:9] - x1[1:3]
        r1 = norm(x_H_bar_1)
        r2 = norm(x_H_bar_2)
      } else {
        x_H_bar_1 = x1[7:9] - x1[1:3]
        x_H_bar_2 = x1[4:6] - x1[1:3]
        r1 = norm(x_H_bar_1)
        r2 = norm(x_H_bar_2)
      }
      
      theta1 = atan2(x_H_bar_1[1], x_H_bar_1[2])
      x_H_tilde_1 = c(0, sin(theta1) * x_H_bar_1[1] + cos(theta1) *
                         x_H_bar_1[2], x_H_bar_1[3])
      phi1 = -atan2(x_H_tilde_1[3], x_H_tilde_1[2])
      
      psi_1 = matrix(c(
        cos(theta1), -sin(theta1), 0,
        sin(theta1), cos(theta1), 0,
        0, 0, 1
      ), nrow = 3, byrow = TRUE)
      
      psi_2 = matrix(c(
        1, 0, 0,
        0, cos(phi1), -sin(phi1),
        0, sin(phi1), cos(phi1)
      ), nrow = 3, byrow = TRUE)
      
      x_H_tilde_2 = psi_2 %*% psi_1 %*% x_H_bar_2
      
      phi2 = -atan2(x_H_tilde_2[3], x_H_tilde_2[1])
      psi_3 = matrix(c(
        cos(phi2), 0, -sin(phi2),
        0, 1, 0,
        sin(phi2), 0, cos(phi2)
      ), nrow = 3, byrow = TRUE)
      
      A1 = c(r1, (psi_3 %*% x_H_tilde_2)[1], (psi_3 %*% x_H_tilde_2)[2])
      
      if (norm(x2[4:6] - x2[1:3]) >= norm(x2[7:9] - x2[1:3])) {
        x_H2_bar_1 = x2[4:6] - x2[1:3]
        x_H2_bar_2 = x2[7:9] - x2[1:3]
        r1_2 = norm(x_H2_bar_1)
        r2_2 = norm(x_H2_bar_2)
      } else {
        x_H2_bar_1 = x2[7:9] - x2[1:3]
        x_H2_bar_2 = x2[4:6] - x2[1:3]
        r1_2 = norm(x_H2_bar_1)
        r2_2 = norm(x_H2_bar_2)
      }
      
      theta1_2 = atan2(x_H2_bar_1[1], x_H2_bar_1[2])
      x_H2_tilde_1 = c(0, sin(theta1_2) * x_H2_bar_1[1] + 
                          cos(theta1_2) * x_H2_bar_1[2], x_H2_bar_1[3])
      phi1_2 = -atan2(x_H2_tilde_1[3], x_H2_tilde_1[2])
      
      psi_1_2 = matrix(c(
        cos(theta1_2), -sin(theta1_2), 0,
        sin(theta1_2), cos(theta1_2), 0,
        0, 0, 1
      ), nrow = 3, byrow = TRUE)
      
      psi_2_2 = matrix(c(
        1, 0, 0,
        0, cos(phi1_2), -sin(phi1_2),
        0, sin(phi1_2), cos(phi1_2)
      ), nrow = 3, byrow = TRUE)
      
      x_H2_tilde_2 = psi_2_2 %*% psi_1_2 %*% x_H2_bar_2
      
      phi2_2 = -atan2(x_H2_tilde_2[3], x_H2_tilde_2[1])
      psi_3_2 = matrix(c(
        cos(phi2_2), 0, -sin(phi2_2),
        0, 1, 0,
        sin(phi2_2), 0, cos(phi2_2)
      ), nrow = 3, byrow = TRUE)
      
      A2 = c(r1_2, (psi_3_2 %*% x_H2_tilde_2)[1], (psi_3_2 %*% x_H2_tilde_2)[2])
      
      cov1 = t(psi_3 %*% psi_2 %*% psi_1) %*% diag(c(
        sigma1^2 * exp(-norm(A1 - A2)^2 / (2 * l1^2)) / (l1^3) * norm(A1 - A2)^2, 
        0, 
        0
      )) %*% psi_3_2 %*% psi_2_2 %*% psi_1_2
      
      cov2 = t(psi_3 %*% psi_2 %*% psi_1) %*% diag(c(
        0, 
        sigma2^2 * exp(-norm(A1 - A2)^2 / (2 * l2^2)) / (l2^3) * norm(A1 - A2)^2, 
        0
      )) %*% psi_3_2 %*% psi_2_2 %*% psi_1_2
      
      cov3 = t(psi_3 %*% psi_2 %*% psi_1) %*% diag(c(
        0, 
        0, 
        sigma3^2 * exp(-norm(A1 - A2)^2 / (2 * l3^2)) / (l3^3) * norm(A1 - A2)^2
      )) %*% psi_3_2 %*% psi_2_2 %*% psi_1_2
      
      cov4 = t(psi_3 %*% psi_2 %*% psi_1) %*% diag(c(
        2 * sigma1 * exp(-norm(A1 - A2)^2 / (2 * l1^2)), 
        0, 
        0
      )) %*% psi_3_2 %*% psi_2_2 %*% psi_1_2
      
      cov5 = t(psi_3 %*% psi_2 %*% psi_1) %*% diag(c(
        0, 
        2 * sigma2 * exp(-norm(A1 - A2)^2 / (2 * l2^2)), 
        0
      )) %*% psi_3_2 %*% psi_2_2 %*% psi_1_2
      
      cov6 = t(psi_3 %*% psi_2 %*% psi_1) %*% diag(c(
        0, 
        0, 
        2 * sigma3 * exp(-norm(A1 - A2)^2 / (2 * l3^2))
      )) %*% psi_3_2 %*% psi_2_2 %*% psi_1_2
      
      x1=x2=xtr
      
      K_l1[i, j] = cov1[1,1]
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,2]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,3]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[2,1]
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[3,1]
      
      K_l1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,2]
      K_l1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,3]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,2]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,3]
      
      
      
      K_l2[i, j] = cov2[1,1]
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,2]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,3]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[2,1]
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[3,1]
      
      K_l2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,2]
      K_l2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,3]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,2]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,3]
      
      
      K_l3[i, j] = cov3[1,1]
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,2]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,3]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[2,1]
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[3,1]
      
      K_l3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,2]
      K_l3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,3]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,2]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,3]
      
      
      
      K_s1[i, j] = cov4[1,1]
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,2]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,3]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[2,1]
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[3,1]
      
      K_s1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,2]
      K_s1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,3]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,2]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,3]
      
      
      
      K_s2[i, j] = cov5[1,1]
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,2]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,3]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[2,1]
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[3,1]
      
      K_s2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,2]
      K_s2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,3]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,2]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,3]
      
      
      
      K_s3[i, j] = cov6[1,1]
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,2]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,3]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[2,1]
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[3,1]
      
      K_s3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,2]
      K_s3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,3]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,2]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,3]
      
    }
  }
 
  y_hat = c(ytr[, 1], ytr[, 2], ytr[, 3])
  grad = 0.5 * c(
     -t(y_hat) %*% inv_K %*% K_l1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l1),
     -t(y_hat) %*% inv_K %*% K_s1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s1),
     -t(y_hat) %*% inv_K %*% K_l2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l2),
     -t(y_hat) %*% inv_K %*% K_s2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s2),
     -t(y_hat) %*% inv_K %*% K_l3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l3),
     -t(y_hat) %*% inv_K %*% K_s3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s3)
     )

  return(grad)

}

grad_standard=function(xtr,ytr,l1,sigma1,l2,sigma2,l3,sigma3){
  Trace=function(x){sum(diag(x))}
  x1=x2=xtr
  
  norm=function(x){sum(x^2)^.5}
  
  K=cross_cov_mat3d(xtr,xtr,l1,sigma1,l2,sigma2,l3,sigma3)+
    diag(jit,nrow=3*nrow(xtr))
  
  
  K_l1 =  K_l2 = K_l3 = K_s1 = K_s2 = K_s3 =
    matrix(0, ncol =3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)),
           nrow = 3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)))
  
  inv_K=inv(K)
  
  
  for(i in 1:(length(as.matrix(x1))/9)){
    for (j in 1:(length(as.matrix(x2))/9)){
      x1=x1[i,];x2=x2[j,]
      cov1= cov_mat3d(x1,x2,l1,sigma1,l2,0 ,l3,0)*(norm(x1-x2))^2/(l1^3)
      
      cov2= cov_mat3d(x1,x2,l1,0,l2,sigma2 ,l3,0)*(norm(x1-x2))^2/(l2^3)
      
      cov3= cov_mat3d(x1,x2,l1,0,l2,0 ,l3,sigma3)*(norm(x1-x2))^2/(l3^3)
      
      cov4= cov_mat3d(x1,x2,l1,1,l2,0,l3,0)*2*sigma1
      
      
      cov5= cov_mat3d(x1,x2,l1,0,l2,1,l3,0)*2*sigma2
      
      cov6= cov_mat3d(x1,x2,l1,0,l2,0,l3,1)*2*sigma3
      
      x1=x2=xtr
      
      K_l1[i, j] = cov1[1,1]
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,2]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,3]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[2,1]
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[3,1]
      
      K_l1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,2]
      K_l1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,3]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,2]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,3]
      
      
      
      K_l2[i, j] = cov2[1,1]
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,2]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,3]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[2,1]
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[3,1]
      
      K_l2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,2]
      K_l2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,3]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,2]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,3]
      
      
      K_l3[i, j] = cov3[1,1]
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,2]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,3]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[2,1]
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[3,1]
      
      K_l3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,2]
      K_l3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,3]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,2]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,3]
      
      
      
      K_s1[i, j] = cov4[1,1]
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,2]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,3]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[2,1]
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[3,1]
      
      K_s1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,2]
      K_s1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,3]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,2]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,3]
      
      
      
      K_s2[i, j] = cov5[1,1]
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,2]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,3]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[2,1]
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[3,1]
      
      K_s2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,2]
      K_s2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,3]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,2]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,3]
      
      
      
      K_s3[i, j] = cov6[1,1]
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,2]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,3]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[2,1]
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[3,1]
      
      K_s3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,2]
      K_s3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,3]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,2]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,3]
      
      
    }
  }
   y_hat = c(ytr[, 1], ytr[, 2], ytr[, 3])
   grad = 0.5 * c(
     -t(y_hat) %*% inv_K %*% K_l1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l1),
     -t(y_hat) %*% inv_K %*% K_s1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s1),
     -t(y_hat) %*% inv_K %*% K_l2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l2),
     -t(y_hat) %*% inv_K %*% K_s2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s2),
     -t(y_hat) %*% inv_K %*% K_l3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l3),
     -t(y_hat) %*% inv_K %*% K_s3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s3)
     )

  
  return(grad)
  
  
}


grad_standard2=function(xtr,ytr,l1,sigma1,l2,sigma2,l3,sigma3){
  Trace=function(x){sum(diag(x))}
  x1=x2=xtr
  
  norm=function(x){sum(x^2)^.5}
  
  K=cross_cov_mat3d_2(xtr,xtr,l1,sigma1,l2,sigma2,l3,sigma3)+
    diag(jit,nrow=3*nrow(xtr))
  
  
  K_l1 =  K_l2 = K_l3 = K_s1 = K_s2 = K_s3 =
    matrix(0, ncol =3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)),
           nrow = 3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)))
  
  inv_K=inv(K)
  
  
  for(i in 1:(length(as.matrix(x1))/9)){
    for (j in 1:(length(as.matrix(x2))/9)){
      x1=x1[i,]-rep(x1[i,1:3],3);x2=x2[j,]-rep(x2[j,1:3],3)
      
      cov1= cov_mat3d_2(x1,x2,l1,sigma1,l2,0 ,l3,0)*(norm(x1-x2))^2/(l1^3)
      
      cov2= cov_mat3d_2(x1,x2,l1,0,l2,sigma2 ,l3,0)*(norm(x1-x2))^2/(l2^3)
      
      cov3= cov_mat3d_2(x1,x2,l1,0,l2,0 ,l3,sigma3)*(norm(x1-x2))^2/(l3^3)
      
      cov4= cov_mat3d_2(x1,x2,l1,1,l2,0,l3,0)*2*sigma1
      
      cov5= cov_mat3d_2(x1,x2,l1,0,l2,1,l3,0)*2*sigma2
      
      cov6= cov_mat3d_2(x1,x2,l1,0,l2,0,l3,1)*2*sigma3
      
      x1=x2=xtr
      
      K_l1[i, j] = cov1[1,1]
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,2]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,3]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[2,1]
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[3,1]
      
      K_l1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,2]
      K_l1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,3]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,2]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,3]
      
      
      
      K_l2[i, j] = cov2[1,1]
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,2]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,3]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[2,1]
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[3,1]
      
      K_l2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,2]
      K_l2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,3]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,2]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,3]
      
      
      K_l3[i, j] = cov3[1,1]
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,2]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,3]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[2,1]
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[3,1]
      
      K_l3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,2]
      K_l3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,3]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,2]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,3]
      
      
      
      K_s1[i, j] = cov4[1,1]
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,2]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,3]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[2,1]
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[3,1]
      
      K_s1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,2]
      K_s1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,3]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,2]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,3]
      
      
      
      K_s2[i, j] = cov5[1,1]
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,2]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,3]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[2,1]
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[3,1]
      
      K_s2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,2]
      K_s2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,3]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,2]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,3]
      
      
      
      K_s3[i, j] = cov6[1,1]
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,2]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,3]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[2,1]
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[3,1]
      
      K_s3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,2]
      K_s3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,3]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,2]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,3]
      
      
    }
  }
   y_hat = c(ytr[, 1], ytr[, 2], ytr[, 3])
   grad = 0.5 * c(
     -t(y_hat) %*% inv_K %*% K_l1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l1),
     -t(y_hat) %*% inv_K %*% K_s1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s1),
     -t(y_hat) %*% inv_K %*% K_l2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l2),
     -t(y_hat) %*% inv_K %*% K_s2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s2),
     -t(y_hat) %*% inv_K %*% K_l3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l3),
     -t(y_hat) %*% inv_K %*% K_s3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s3)
     )
  return(grad)
  
  
}

grad_standard3=function(xtr,ytr,l1,sigma1,l2,sigma2,l3,sigma3){
  Trace=function(x){sum(diag(x))}
  x1=x2=xtr
  
  norm=function(x){sum(x^2)^.5}
  
  K=cross_cov_mat3d_3(xtr,xtr,l1,sigma1,l2,sigma2,l3,sigma3)+
    diag(jit,nrow=3*nrow(xtr))
  
  
  K_l1 =  K_l2 = K_l3 = K_s1 = K_s2 = K_s3 =
    matrix(0, ncol =3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)),
           nrow = 3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)))
  
  inv_K=inv(K)
  
  
  for(i in 1:(length(as.matrix(x1))/9)){
    for (j in 1:(length(as.matrix(x2))/9)){
      X1=x1[i,]-rep(x1[i,1:3],3);X2=x2[j,]-rep(x2[j,1:3],3)
      
      if(norm(X1[4:6])<norm(X1[7:9])){
        X1=c(X1[1:3],X1[7:9],X1[4:6])
      }
      if(norm(X2[4:6])<norm(X2[7:9])){
        X2=c(X2[1:3],X2[7:9],X2[4:6])
      }
      x1=x1[i,];x2=x2[j,]
      
      cov1= cov_mat3d_3(x1,x2,l1,sigma1,l2,0 ,l3,0)*(norm(X1-X2))^2/(l1^3)
      
      cov2= cov_mat3d_3(x1,x2,l1,0,l2,sigma2 ,l3,0)*(norm(X1-X2))^2/(l2^3)
      
      cov3= cov_mat3d_3(x1,x2,l1,0,l2,0 ,l3,sigma3)*(norm(X1-X2))^2/(l3^3)
      
      cov4= cov_mat3d_3(x1,x2,l1,1,l2,0,l3,0)*2*sigma1
      
      cov5= cov_mat3d_3(x1,x2,l1,0,l2,1,l3,0)*2*sigma2
      
      cov6= cov_mat3d_3(x1,x2,l1,0,l2,0,l3,1)*2*sigma3
      
      x1=x2=xtr
      
      K_l1[i, j] = cov1[1,1]
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,2]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,3]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[2,1]
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[3,1]
      
      K_l1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,2]
      K_l1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,3]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,2]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,3]
      
      
      
      K_l2[i, j] = cov2[1,1]
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,2]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,3]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[2,1]
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[3,1]
      
      K_l2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,2]
      K_l2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,3]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,2]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,3]
      
      
      K_l3[i, j] = cov3[1,1]
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,2]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,3]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[2,1]
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[3,1]
      
      K_l3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,2]
      K_l3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,3]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,2]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,3]
      
      
      
      K_s1[i, j] = cov4[1,1]
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,2]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,3]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[2,1]
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[3,1]
      
      K_s1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,2]
      K_s1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,3]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,2]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,3]
      
      
      
      K_s2[i, j] = cov5[1,1]
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,2]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,3]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[2,1]
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[3,1]
      
      K_s2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,2]
      K_s2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,3]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,2]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,3]
      
      
      
      K_s3[i, j] = cov6[1,1]
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,2]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,3]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[2,1]
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[3,1]
      
      K_s3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,2]
      K_s3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,3]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,2]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,3]
      
      
    }
  }
  y_hat = c(ytr[, 1], ytr[, 2], ytr[, 3])
   grad = 0.5 * c(
     -t(y_hat) %*% inv_K %*% K_l1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l1),
     -t(y_hat) %*% inv_K %*% K_s1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s1),
     -t(y_hat) %*% inv_K %*% K_l2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l2),
     -t(y_hat) %*% inv_K %*% K_s2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s2),
     -t(y_hat) %*% inv_K %*% K_l3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l3),
     -t(y_hat) %*% inv_K %*% K_s3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s3)
     )
  
  return(grad)
  
  
}


grad_standard4=function(xtr,ytr,l1,sigma1,l2,sigma2,l3,sigma3){
  Trace=function(x){sum(diag(x))}
  x1=x2=xtr
  
  norm=function(x){sum(x^2)^.5}
  
  K=cross_cov_mat3d_4(xtr,xtr,l1,sigma1,l2,sigma2,l3,sigma3)+
    diag(jit,nrow=3*nrow(xtr))
  
  
  K_l1 =  K_l2 = K_l3 = K_s1 = K_s2 = K_s3 =
    matrix(0, ncol =3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)),
           nrow = 3 * ifelse(length(as.matrix(xtr)) == 9, 1, nrow(xtr)))
  
  inv_K=inv(K)
  
  
  for(i in 1:(length(as.matrix(x1))/9)){
    for (j in 1:(length(as.matrix(x2))/9)){
      X1=x1[i,]-rep(x1[i,1:3],3);X2=x2[j,]-rep(x2[j,1:3],3)
      
      if(norm(X1[4:6])<norm(X1[7:9])){
        X1=c(0,0,0,X1[7:9],X1[4:6])
      }
      if(norm(X2[4:6])<norm(X2[7:9])){
        X2=c(0,0,0,X2[7:9],X2[4:6])
      }
      x1=x1[i,];x2=x2[j,]
      
      cov1= cov_mat3d_4(x1,x2,l1,sigma1,l2,0 ,l3,0)*(norm(X1-X2))^2/(l1^3)
      
      cov2= cov_mat3d_4(x1,x2,l1,0,l2,sigma2 ,l3,0)*(norm(X1-X2))^2/(l2^3)
      
      cov3= cov_mat3d_4(x1,x2,l1,0,l2,0 ,l3,sigma3)*(norm(X1-X2))^2/(l3^3)
      
      cov4= cov_mat3d_4(x1,x2,l1,1,l2,0,l3,0)*2*sigma1
      
      cov5= cov_mat3d_4(x1,x2,l1,0,l2,1,l3,0)*2*sigma2
      
      cov6= cov_mat3d_4(x1,x2,l1,0,l2,0,l3,1)*2*sigma3
      
      x1=x2=xtr
      
      K_l1[i, j] = cov1[1,1]
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,2]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,3]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[2,1]
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov1[3,1]
      
      K_l1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,2]
      K_l1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[1,3]
      
      K_l1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[3,2]
      
      K_l1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov1[2,3]
      
      
      
      K_l2[i, j] = cov2[1,1]
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,2]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,3]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[2,1]
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov2[3,1]
      
      K_l2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,2]
      K_l2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[1,3]
      
      K_l2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[3,2]
      
      K_l2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov2[2,3]
      
      
      K_l3[i, j] = cov3[1,1]
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,2]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,3]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[2,1]
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov3[3,1]
      
      K_l3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,2]
      K_l3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[1,3]
      
      K_l3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[3,2]
      
      K_l3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
           2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov3[2,3]
      
      
      
      K_s1[i, j] = cov4[1,1]
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,2]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,3]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[2,1]
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov4[3,1]
      
      K_s1[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,2]
      K_s1[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[1,3]
      
      K_s1[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[3,2]
      
      K_s1[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov4[2,3]
      
      
      
      K_s2[i, j] = cov5[1,1]
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,2]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,3]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[2,1]
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov5[3,1]
      
      K_s2[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,2]
      K_s2[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[1,3]
      
      K_s2[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[3,2]
      
      K_s2[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov5[2,3]
      
      
      
      K_s3[i, j] = cov6[1,1]
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,2]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,3]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[2,1]
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i, j] = cov6[3,1]
      
      K_s3[i,ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,2]
      K_s3[i,2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[1,3]
      
      K_s3[2*ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[3,2]
      
      K_s3[ifelse(length(as.matrix(x1))==9,1,nrow(x1)) + i,
               2*ifelse(length(as.matrix(x2))==9,1,nrow(x2)) + j] = cov6[2,3]
      
      
    }
  }
  y_hat = c(ytr[, 1], ytr[, 2], ytr[, 3])
   grad = 0.5 * c(
     -t(y_hat) %*% inv_K %*% K_l1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l1),
     -t(y_hat) %*% inv_K %*% K_s1 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s1),
     -t(y_hat) %*% inv_K %*% K_l2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l2),
     -t(y_hat) %*% inv_K %*% K_s2 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s2),
     -t(y_hat) %*% inv_K %*% K_l3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_l3),
     -t(y_hat) %*% inv_K %*% K_s3 %*% inv_K %*% y_hat + Trace(inv_K %*% K_s3)
     )
  
  return(grad)
  
  
}

Gradient check

nloptr(runif(6),function(x) {log_likelihood_fund(
                   Y[1:10,], X[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               function(x) {grad_fund(
                   X[1:10,],Y[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 5 error(s) detected.
## 
## * eval_grad_f[ 1 ] = -1.649459e+01 ~ -1.648628e+01   [5.039492e-04]
## * eval_grad_f[ 2 ] =  6.234175e+00 ~  6.233465e+00   [1.138179e-04]
## * eval_grad_f[ 3 ] = -3.235375e+01 ~ -3.234432e+01   [2.912833e-04]
## * eval_grad_f[ 4 ] = -2.072214e+00 ~ -2.077481e+00   [2.535336e-03]
##   eval_grad_f[ 5 ] = -6.990195e+01 ~ -6.989578e+01   [8.815438e-05]
## * eval_grad_f[ 6 ] =  1.023361e+01 ~  1.024320e+01   [9.358791e-04]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_fund(Y[1:10, ], X[1:10, ], x[1], x[2], x[3], 
##         x[4], x[5], x[6])}, eval_grad_f = function(x) {
##     grad_fund(X[1:10, ], Y[1:10, ], x[1], x[2], x[3], x[4], x[5],         x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 20 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  -144.454501552487 
## Optimal value of controls: 17.07131 -3.708934 30.97093 8.842872 71.63975 0.0003286632
nloptr(runif(6),function(x) {log_likelihood_standard(
                   Y[1:10,], X[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               function(x) {grad_standard(
                   X[1:10,],Y[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] =  1.312511e-01 ~  1.312509e-01   [ 1.569079e-06]
##   eval_grad_f[ 2 ] = -2.791847e+01 ~ -2.791847e+01   [ 9.508031e-08]
##   eval_grad_f[ 3 ] = -1.532505e-01 ~ -1.532507e-01   [ 1.452778e-06]
##   eval_grad_f[ 4 ] = -3.402721e+01 ~ -3.402721e+01   [ 1.420291e-07]
##   eval_grad_f[ 5 ] = -3.745238e-25 ~  0.000000e+00   [-3.745238e-25]
##   eval_grad_f[ 6 ] = -8.830342e+01 ~ -8.830341e+01   [ 8.573922e-08]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_standard(Y[1:10, ], X[1:10, ], x[1], x[2], 
##         x[3], x[4], x[5], x[6])}, eval_grad_f = function(x) {
##     grad_standard(X[1:10, ], Y[1:10, ], x[1], x[2], x[3], x[4], 
##         x[5], x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 30 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  13.3666156703162 
## Optimal value of controls: 0.4644953 0.5891613 1.628843 0.4051168 0.2872373 0.5685002
nloptr(runif(6),function(x) {log_likelihood_standard2(
                   Y[1:10,], X[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               function(x) {grad_standard2(
                   X[1:10,],Y[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 1 error(s) detected.
## 
## * eval_grad_f[ 1 ] = -1.377056e-04 ~ -1.378059e-04   [ 7.280097e-04]
##   eval_grad_f[ 2 ] =  5.243801e+00 ~  5.243802e+00   [ 5.316595e-08]
##   eval_grad_f[ 3 ] = -2.590131e-34 ~  0.000000e+00   [-2.590131e-34]
##   eval_grad_f[ 4 ] = -2.593984e+02 ~ -2.593983e+02   [ 1.479640e-07]
##   eval_grad_f[ 5 ] = -4.260701e-01 ~ -4.260707e-01   [ 1.420085e-06]
##   eval_grad_f[ 6 ] =  6.332940e+00 ~  6.332940e+00   [ 8.072441e-08]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_standard2(Y[1:10, ], X[1:10, ], x[1], x[2], 
##         x[3], x[4], x[5], x[6])}, eval_grad_f = function(x) {
##     grad_standard2(X[1:10, ], Y[1:10, ], x[1], x[2], x[3], x[4], 
##         x[5], x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 41 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  8.6070034652794 
## Optimal value of controls: 0.2646216 0.589161 0.09702358 0.4082305 4.095346 0.6890581
nloptr(runif(6),function(x) {log_likelihood_standard3(
                   Y[1:10,], X[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               function(x) {grad_standard3(
                   X[1:10,],Y[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] = -7.145480e-20 ~  0.000000e+00   [-7.145480e-20]
##   eval_grad_f[ 2 ] = -2.477990e+02 ~ -2.477990e+02   [ 1.119019e-07]
##   eval_grad_f[ 3 ] = -9.582489e-12 ~  0.000000e+00   [-9.582489e-12]
##   eval_grad_f[ 4 ] =  8.542430e+00 ~  8.542431e+00   [ 1.115974e-07]
##   eval_grad_f[ 5 ] = -1.457783e-15 ~  0.000000e+00   [-1.457783e-15]
##   eval_grad_f[ 6 ] = -3.133496e+02 ~ -3.133496e+02   [ 1.175936e-07]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_standard3(Y[1:10, ], X[1:10, ], x[1], x[2], 
##         x[3], x[4], x[5], x[6])}, eval_grad_f = function(x) {
##     grad_standard3(X[1:10, ], Y[1:10, ], x[1], x[2], x[3], x[4], 
##         x[5], x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 39 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  13.4815076758917 
## Optimal value of controls: 0.2864246 0.5891614 0.3937582 0.4082304 0.3143971 0.5685001
nloptr(runif(6),function(x) {log_likelihood_standard4(
                   Y[1:10,], X[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               function(x) {grad_standard4(
                   X[1:10,],Y[1:10,],x[1],x[2],x[3],x[4],x[5],x[6])},
               opts=list(algorithm="NLOPT_LD_LBFGS",check_derivatives=TRUE))
## Checking gradients of objective function.
## Derivative checker results: 0 error(s) detected.
## 
##   eval_grad_f[ 1 ] = -2.758516e+02 ~ -2.758517e+02   [2.526900e-08]
##   eval_grad_f[ 2 ] = -2.420512e+04 ~ -2.420511e+04   [4.941483e-07]
##   eval_grad_f[ 3 ] = -3.778862e+00 ~ -3.778870e+00   [2.106298e-06]
##   eval_grad_f[ 4 ] =  8.901419e+00 ~  8.901398e+00   [2.439532e-06]
##   eval_grad_f[ 5 ] = -4.146362e+00 ~ -4.146378e+00   [3.652060e-06]
##   eval_grad_f[ 6 ] =  8.068970e+00 ~  8.068970e+00   [3.791312e-08]
## 
## Call:
## nloptr(x0 = runif(6), eval_f = function(x) {
##     log_likelihood_standard4(Y[1:10, ], X[1:10, ], x[1], x[2], 
##         x[3], x[4], x[5], x[6])}, eval_grad_f = function(x) {
##     grad_standard4(X[1:10, ], Y[1:10, ], x[1], x[2], x[3], x[4], 
##         x[5], x[6])
## }, opts = list(algorithm = "NLOPT_LD_LBFGS", check_derivatives = TRUE))
## 
## 
## Minimization using NLopt version 2.7.1 
## 
## NLopt solver status: 4 ( NLOPT_XTOL_REACHED: Optimization stopped because 
## xtol_rel or xtol_abs (above) was reached. )
## 
## Number of Iterations....: 21 
## Termination conditions:  relative x-tolerance = 1e-04 (DEFAULT) 
## Number of inequality constraints:  0 
## Number of equality constraints:    0 
## Optimal value of objective function:  37.7676494326641 
## Optimal value of controls: 279.6305 24205.13 23.60211 -4.137978 7.329673 1.157912

Model fitting

if(max_iter==1000){
  ntrain=6
  train_size_stepsize=c(10,10,10,10,30,30)
}else{
  train_size_stepsize=rep(10,10)
  ntrain=10
}


data_set_size=nrow(X)
RMSES=LogS=matrix(nrow=ntrain,ncol=5)
colnames(RMSES)=c("standard","standard 2","standard 3","standard 4","fundamental")
colnames(LogS)=c("standard","standard 2","standard 3","standard 4","fundamental")


for( i in 1:ntrain){

  train_ind=c(train_ind,sample((1:data_set_size)[-train_ind],
                               train_size_stepsize[i]))
  test_ind=(1:data_set_size)[-train_ind]
  
  

  Xtr=X[train_ind,]
  Xte=X[test_ind,]
  Ytr=Y[train_ind,]
  Yte=Y[test_ind,]

  
  
  initial_par=rep(1,6)
  
  
  opt_par=Adam(function(x) {log_likelihood_standard(
                            Ytr,Xtr, x[1], x[2], x[3], x[4], x[5],x[6])},
                function(x) {grad_standard(
                            Xtr, Ytr, x[1], x[2], x[3], x[4], x[5], x[6])},
               initial_par,lr=lr,max_iter=max_iter)
  
  
  
  Ktetr=cross_cov_mat3d(Xte,Xtr,opt_par[1],
                           opt_par[2],
                           opt_par[3],
                           opt_par[4],
                           opt_par[5],
                           opt_par[6])
  
 Ktetr_trtr_inv=Ktetr%*%inv(cross_cov_mat3d(Xtr,Xtr,opt_par[1],
                                                   opt_par[2],
                                                   opt_par[3],
                                                   opt_par[4],
                                                   opt_par[5],
                                                   opt_par[6])+diag(jit,nrow=3*nrow(Xtr)))
  
  
  
  posterior_mean=Ktetr_trtr_inv%*%c(Ytr[,1],Ytr[,2],Ytr[,3])
  
  
  RMSES[i,1]=mean(apply((cbind(posterior_mean[1:(length(posterior_mean)/3)],
                               posterior_mean[(length(posterior_mean)/3+1):
                                              (2*length(posterior_mean)/3)],
                               posterior_mean[(2*length(posterior_mean)/3+1):
                                              (length(posterior_mean))])-Yte)^2,
                           1,sum))^.5
  
  
  posterior_cov= cross_cov_mat3d(Xte,Xte,opt_par[1],
                                    opt_par[2],
                                    opt_par[3],
                                    opt_par[4],opt_par[5],
                                    opt_par[6])-
   Ktetr_trtr_inv%*%t(Ktetr)
  
  
  LogS[i,1]=(t(c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)%*%
                      inv(posterior_cov+diag(jit,3*nrow(Xte))
                      )%*%
                      (c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)
                    +determinant(posterior_cov+diag(jit,3*nrow(Xte))
                    )$modulus[1]+
                      log(2*pi)*nrow(Xte))/nrow(Xte)
  
  
  
  opt_par=Adam(function(x) {
    log_likelihood_standard2(
      Ytr,Xtr, x[1], x[2], x[3], x[4], x[5],x[6])
  },function(x) {
    grad_standard2(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5],
                   x[6])
  },initial_par,lr=lr,max_iter=max_iter)
  
  
  
  Ktetr=cross_cov_mat3d_2(Xte,Xtr,opt_par[1],
                           opt_par[2],
                           opt_par[3],
                           opt_par[4],
                           opt_par[5],
                           opt_par[6])
  
 Ktetr_trtr_inv=Ktetr%*%inv(cross_cov_mat3d_2(Xtr,Xtr,opt_par[1],
                                                   opt_par[2],
                                                   opt_par[3],
                                                   opt_par[4],
                                                   opt_par[5],
                                                   opt_par[6])+
                                   diag(jit,nrow=3*nrow(Xtr)))
  
  
  
  posterior_mean=Ktetr_trtr_inv%*%c(Ytr[,1],Ytr[,2],Ytr[,3])
  
  
  RMSES[i,2]=mean(apply((
                   cbind(posterior_mean[1:(length(posterior_mean)/3)],
                         posterior_mean[(length(posterior_mean)/3+1):
                                            (2*length(posterior_mean)/3)],
                         posterior_mean[(2*length(posterior_mean)/3+1):
                                        (length(posterior_mean))])-
                   Yte)^2,1,sum))^.5
  
  
  posterior_cov= cross_cov_mat3d_2(Xte,Xte,opt_par[1],
                                    opt_par[2],
                                    opt_par[3],
                                    opt_par[4],opt_par[5],
                                    opt_par[6])-
                  Ktetr_trtr_inv%*%t(Ktetr)
  
  
  LogS[i,2]=(t(c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)%*%
                      inv(posterior_cov+diag(jit,3*nrow(Xte))
                      )%*%
                      (c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)
                    +determinant(posterior_cov+diag(jit,3*nrow(Xte))
                    )$modulus[1]+
                      log(2*pi)*nrow(Xte))/nrow(Xte)
  
  opt_par=Adam(function(x) {
    log_likelihood_standard3(
      Ytr,Xtr, x[1], x[2], x[3], x[4], x[5],x[6])
  },function(x) {
    grad_standard3(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5],
                   x[6])
  },initial_par,lr=lr,max_iter=max_iter)
  
  
  
  Ktetr=cross_cov_mat3d_3(Xte,Xtr,opt_par[1],
                           opt_par[2],
                           opt_par[3],
                           opt_par[4],
                           opt_par[5],
                           opt_par[6])
  
 Ktetr_trtr_inv=Ktetr%*%inv(cross_cov_mat3d_3(Xtr,Xtr,opt_par[1],
                                                   opt_par[2],
                                                   opt_par[3],
                                                   opt_par[4],
                                                   opt_par[5],
                                                   opt_par[6])+
                                   diag(jit,nrow=3*nrow(Xtr)))
  
  
  
  posterior_mean=Ktetr_trtr_inv%*%c(Ytr[,1],Ytr[,2],Ytr[,3])
  
  
  RMSES[i,3]=mean(apply((
                   cbind(posterior_mean[1:(length(posterior_mean)/3)],
                         posterior_mean[(length(posterior_mean)/3+1):
                                            (2*length(posterior_mean)/3)],
                         posterior_mean[(2*length(posterior_mean)/3+1):
                                        (length(posterior_mean))])-
                   Yte)^2,1,sum))^.5
  
  
  
  
  posterior_cov= cross_cov_mat3d_3(Xte,Xte,opt_par[1],
                                    opt_par[2],
                                    opt_par[3],
                                    opt_par[4],opt_par[5],
                                    opt_par[6])-
   Ktetr_trtr_inv%*%t(Ktetr)
  
  
  LogS[i,3]=(t(c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)%*%
                      inv(posterior_cov+diag(jit,3*nrow(Xte))
                      )%*%
                      (c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)
                    +determinant(posterior_cov+diag(jit,3*nrow(Xte))
                    )$modulus[1]+
                      log(2*pi)*nrow(Xte))/nrow(Xte)
  
  
  
  opt_par=Adam(function(x) {
    log_likelihood_standard4(
      Ytr,Xtr, x[1], x[2], x[3], x[4], x[5],x[6])
  },function(x) {
    grad_standard4(Xtr, Ytr, x[1], x[2], x[3], x[4], x[5],
                   x[6])
  },initial_par,lr=lr,max_iter=max_iter)
  
  
  
  Ktetr=cross_cov_mat3d_4(Xte,Xtr,opt_par[1],
                           opt_par[2],
                           opt_par[3],
                           opt_par[4],
                           opt_par[5],
                           opt_par[6])
  
 Ktetr_trtr_inv=Ktetr%*%inv(cross_cov_mat3d_4(Xtr,Xtr,opt_par[1],
                                                   opt_par[2],
                                                   opt_par[3],
                                                   opt_par[4],
                                                   opt_par[5],
                                                   opt_par[6])+
                                   diag(jit,nrow=3*nrow(Xtr)))
  
  
  
  posterior_mean=Ktetr_trtr_inv%*%c(Ytr[,1],Ytr[,2],Ytr[,3])
  
  
  RMSES[i,4]=mean(apply((
                   cbind(posterior_mean[1:(length(posterior_mean)/3)],
                         posterior_mean[(length(posterior_mean)/3+1):
                                            (2*length(posterior_mean)/3)],
                         posterior_mean[(2*length(posterior_mean)/3+1):
                                        (length(posterior_mean))])-
                   Yte)^2,1,sum))^.5
  
  
  
  
  posterior_cov= cross_cov_mat3d_4(Xte,Xte,opt_par[1],
                                    opt_par[2],
                                    opt_par[3],
                                    opt_par[4],opt_par[5],
                                    opt_par[6])-
   Ktetr_trtr_inv%*%t(Ktetr)
  
  
  LogS[i,4]=(t(c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)%*%
                      inv(posterior_cov+diag(jit,3*nrow(Xte))
                      )%*%
                      (c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean)
                    +determinant(posterior_cov+diag(jit,3*nrow(Xte))
                    )$modulus[1]+
                      log(2*pi)*nrow(Xte))/nrow(Xte)
  
  
  
  opt_par=Adam(function(x) {log_likelihood_fund(
                            Ytr,Xtr, x[1], x[2], x[3], x[4], x[5],x[6])},
               function(x) {grad_fund(
                            Xtr, Ytr, x[1], x[2], x[3], x[4], x[5],x[6])},
               initial_par,lr=lr,max_iter = max_iter)
  
  
  
  Ktetr_eq=cross_fund_cov_mat(Xte,Xtr,opt_par[1],
                              opt_par[2],
                              opt_par[3],
                              opt_par[4],
                              opt_par[5],
                              opt_par[6])
  
  Ktetr_trtr_inv_eq=Ktetr_eq%*%inv(cross_fund_cov_mat(Xtr,Xtr,opt_par[1],
                                                      opt_par[2],
                                                      opt_par[3],
                                                      opt_par[4],
                                                      opt_par[5],
                                                      opt_par[6])+
                                        diag(jit,nrow=3*nrow(Xtr)))
  
  
  
  posterior_mean_eq=Ktetr_trtr_inv_eq%*%c(Ytr[,1],Ytr[,2],Ytr[,3])
  
  
  RMSES[i,5]=mean(apply((cbind(
                 posterior_mean_eq[1:(length(posterior_mean_eq)/3)],
                 posterior_mean_eq[(length(posterior_mean_eq)/3+1):
                                     (2*length(posterior_mean_eq)/3)],
                 posterior_mean_eq[(2*length(posterior_mean_eq)/3+1):
                              (length(posterior_mean_eq))])-Yte)^2,1,sum))^.5
  
  
  posterior_cov_eq= cross_fund_cov_mat(Xte,Xte,
                                       opt_par[1],
                                       opt_par[2],
                                       opt_par[3],
                                       opt_par[4],
                                       opt_par[5],
                                       opt_par[6])-
                    Ktetr_trtr_inv_eq%*%t(Ktetr_eq)
  
  
  LogS[i,5]=(t(c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean_eq)%*%
             
               inv(posterior_cov_eq+diag(jit,3*nrow(Xte)))%*%
                        (c(Yte[,1],Yte[,2],Yte[,3])-posterior_mean_eq)+
               
               determinant(posterior_cov_eq+diag(jit,3*nrow(Xte)))$modulus[1]+
                        log(2*pi)*nrow(Xte))/nrow(Xte)
  

}

Learning curve for one sampling round

x_values=c(20,30,40,50,80,110)

par(mfrow=c(1,2),mar = c(4, 3, 2, 1) + 0.1, oma = c(0, 0, 0, 0), xpd = NA)

col=c(brewer.pal(4,"Dark2")[1],4,brewer.pal(3,"Paired")[1:2],
      brewer.pal(4,"Dark2")[2])

plot(
  x_values, log(RMSES[,1]), type = "b", pch = 19, col = col[1],
  ylim = c(-7,0),
  xlab = "Training size", ylab = "", main = "Mean log(RMSE) Scores",
  xaxt = "n", yaxt = "n", bty = "n", cex.axis = 1.2, cex.lab = 1.2, cex.main = 1.5,lwd=1.5
)
for (y in log(RMSES[,1])) {
      segments(x0 = min(x_values), y0 = y, x1 = max(x_values), y1 = y, col = "gray", lty = "dotted")
}

for( i in 2:ncol(RMSES)){
  lines(x_values,log( RMSES[,i ]), type = "b", pch = 19,lwd=1.5,col = col[i])
  for (k in x_values) {
        segments(x0 = k, y0 = min(log(RMSES)), x1 = k, y1 =max(log(RMSES)), col = "gray", lty = "dotted")
    }
    
    
   for (y in log(RMSES[,i])) {
      segments(x0 = min(x_values), y0 = y, x1 = max(x_values), y1 = y, col = "gray", lty = "dotted")
}
 
  
}


axis(1, at = x_values, labels = x_values, cex.axis = 1.2,lwd=1.5)
axis(2, las = 1, cex.axis = 1.2,lwd=1.5)

plot(
  x_values, LogS[,1], type = "b", pch = 19, col = col[1],
  ylim = c(-40,0),
  xlab = "Training size", ylab = "", main = "Mean log(RMSE) Scores",
  xaxt = "n", yaxt = "n", bty = "n", cex.axis = 1.2, cex.lab = 1.2, cex.main = 1.5,lwd=1.5
)
for (y in LogS[,1]) {
      segments(x0 = min(x_values), y0 = y, x1 = max(x_values), y1 = y, col = "gray", lty = "dotted")
}

for( i in 2:ncol(RMSES)){
  lines(x_values,LogS[,i ], type = "b", pch = 19,lwd=1.5,col = col[i])
  for (k in x_values) {
        segments(x0 = k, y0 = min(LogS), x1 = k, y1 =max(LogS), col = "gray", lty = "dotted")
    }
    
    
for (y in LogS[,i]) {
      segments(x0 = min(x_values), y0 = y, x1 = max(x_values), y1 = y, col = "gray", lty = "dotted")
}
 
}
legend("topright",c(expression("K","K"[2],"K"[3],"K"[4],"K"[Pi]
                               )),lty=1,pch=19,bty="n",col=col,ncol=5,
       lwd=2,cex=1.2)


axis(1, at = x_values, labels = x_values, cex.axis = 1.2,lwd=1.5)
axis(2, las = 1, cex.axis = 1.2,lwd=1.5)